From a4c3791ae0c4a2e1f5d91f078bc9ad6e5d58867e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 3 Feb 2020 14:20:44 +1100 Subject: [PATCH 01/20] spapr/rtas: Print message from "ibm,os-term" The "ibm,os-term" RTAS call has a single parameter which is a pointer to a message from the guest kernel about the termination cause; this prints it. Signed-off-by: Alexey Kardashevskiy Message-Id: <20200203032044.118585-1-aik@ozlabs.ru> Reviewed-by: Daniel Henrique Barboza Signed-off-by: David Gibson --- hw/ppc/spapr_rtas.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 883fe28465..656fdd2216 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -345,6 +345,13 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, target_ulong args, uint32_t nret, target_ulong rets) { + target_ulong msgaddr = rtas_ld(args, 0); + char msg[512]; + + cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1); + msg[sizeof(msg) - 1] = 0; + + error_report("OS terminated: %s", msg); qemu_system_guest_panicked(NULL); rtas_st(rets, 0, RTAS_OUT_SUCCESS); From 2c6e918ef8a101d86e0592051a1ee4bf493a6785 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 6 Feb 2020 00:20:15 +0100 Subject: [PATCH 02/20] qtest: Fix rtas dependencies qtest "rtas" command is only available with pseries not all ppc64 targets, so if I try to compile only powernv machine, the build fails with: /usr/bin/ld: qtest.o: in function `qtest_process_command': .../qtest.c:645: undefined reference to `qtest_rtas_call' We fix this by enabling rtas command only with pseries machine. Fixes: eeddd59f5962 ("tests: add RTAS command in the protocol") Signed-off-by: Laurent Vivier Message-Id: <20200205232016.588202-2-lvivier@redhat.com> Signed-off-by: David Gibson --- qtest.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qtest.c b/qtest.c index 12432f99cf..587dcbb4b5 100644 --- a/qtest.c +++ b/qtest.c @@ -27,7 +27,8 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qemu/cutils.h" -#ifdef TARGET_PPC64 +#include "config-devices.h" +#ifdef CONFIG_PSERIES #include "hw/ppc/spapr_rtas.h" #endif @@ -628,7 +629,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words) #else qtest_sendf(chr, "OK little\n"); #endif -#ifdef TARGET_PPC64 +#ifdef CONFIG_PSERIES } else if (strcmp(words[0], "rtas") == 0) { uint64_t res, args, ret; unsigned long nargs, nret; From 90118a657ce88a003f20f24ca65db96bba8be7a9 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 6 Feb 2020 00:20:16 +0100 Subject: [PATCH 03/20] ppc/pnv: Fix PCI_EXPRESS dependency When PHB4 bridge has been added, the dependencies to PCIE_PORT has been added to XIVE_SPAPR and indirectly to PSERIES. The build of the PowerNV machine is fine while we also build the PSERIES machine. If we disable the PSERIES machine, the PowerNV build fails because the PCI Express files are not built: /usr/bin/ld: hw/ppc/pnv.o: in function `pnv_chip_power8_pic_print_info': .../hw/ppc/pnv.c:623: undefined reference to `pnv_phb3_msi_pic_print_info' /usr/bin/ld: hw/ppc/pnv.o: in function `pnv_chip_power9_pic_print_info': .../hw/ppc/pnv.c:639: undefined reference to `pnv_phb4_pic_print_info' /usr/bin/ld: ../hw/usb/hcd-ehci-pci.o: in function `usb_ehci_pci_write_config': .../hw/usb/hcd-ehci-pci.c:129: undefined reference to `pci_default_write_config' /usr/bin/ld: ../hw/usb/hcd-ehci-pci.o: in function `usb_ehci_pci_realize': .../hw/usb/hcd-ehci-pci.c:68: undefined reference to `pci_allocate_irq' /usr/bin/ld: .../hw/usb/hcd-ehci-pci.c:72: undefined reference to `pci_register_bar' /usr/bin/ld: ../hw/usb/hcd-ehci-pci.o:(.data.rel+0x50): undefined reference to `vmstate_pci_device' This patch fixes the problem by adding needed dependencies to POWERNV. Fixes: 4f9924c4d4cf ("ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge") Signed-off-by: Laurent Vivier Message-Id: <20200205232016.588202-3-lvivier@redhat.com> Signed-off-by: David Gibson --- hw/ppc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index 354828bf13..dd86e664d2 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -29,6 +29,8 @@ config POWERNV select XICS select XIVE select FDT_PPC + select PCI_EXPRESS + select MSI_NONBROKEN config PPC405 bool @@ -135,8 +137,6 @@ config XIVE_SPAPR default y depends on PSERIES select XIVE - select PCI - select PCIE_PORT config XIVE_KVM bool From a78492681995ad4b46a2b6e30bb36935dff5ffb1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 7 Feb 2020 01:46:37 -0500 Subject: [PATCH 04/20] ppc: function to setup latest class options We are going to add more init for the latest machine, so move the setup to a function so we don't have to change the DEFINE_SPAPR_MACHINE macro each time. Signed-off-by: Michael S. Tsirkin Message-Id: <20200207064628.1196095-1-mst@redhat.com> Reviewed-by: Laurent Vivier Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/spapr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index c9b2e0a5e0..691c391060 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4485,6 +4485,12 @@ static const TypeInfo spapr_machine_info = { }, }; +static void spapr_machine_latest_class_options(MachineClass *mc) +{ + mc->alias = "pseries"; + mc->is_default = 1; +} + #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \ static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \ void *data) \ @@ -4492,8 +4498,7 @@ static const TypeInfo spapr_machine_info = { MachineClass *mc = MACHINE_CLASS(oc); \ spapr_machine_##suffix##_class_options(mc); \ if (latest) { \ - mc->alias = "pseries"; \ - mc->is_default = 1; \ + spapr_machine_latest_class_options(mc); \ } \ } \ static const TypeInfo spapr_machine_##suffix##_info = { \ From 3f350f6bb36233be50fc2bc18dc78b6a948a5dbe Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Sun, 9 Feb 2020 22:56:02 -0600 Subject: [PATCH 05/20] mem: move nvdimm_device_list to utilities nvdimm_device_list is required for parsing the list for devices in subsequent patches. Move it to common utility area. Signed-off-by: Shivaprasad G Bhat Reviewed-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <158131055857.2897.15658377276504711773.stgit@lep8c.aus.stglabs.ibm.com> Signed-off-by: David Gibson --- hw/acpi/nvdimm.c | 28 +--------------------------- include/qemu/nvdimm-utils.h | 7 +++++++ util/Makefile.objs | 1 + util/nvdimm-utils.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 27 deletions(-) create mode 100644 include/qemu/nvdimm-utils.h create mode 100644 util/nvdimm-utils.c diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9fdad6dc3f..5219dd0e2e 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -32,33 +32,7 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/nvram/fw_cfg.h" #include "hw/mem/nvdimm.h" - -static int nvdimm_device_list(Object *obj, void *opaque) -{ - GSList **list = opaque; - - if (object_dynamic_cast(obj, TYPE_NVDIMM)) { - *list = g_slist_append(*list, DEVICE(obj)); - } - - object_child_foreach(obj, nvdimm_device_list, opaque); - return 0; -} - -/* - * inquire NVDIMM devices and link them into the list which is - * returned to the caller. - * - * Note: it is the caller's responsibility to free the list to avoid - * memory leak. - */ -static GSList *nvdimm_get_device_list(void) -{ - GSList *list = NULL; - - object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list); - return list; -} +#include "qemu/nvdimm-utils.h" #define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ diff --git a/include/qemu/nvdimm-utils.h b/include/qemu/nvdimm-utils.h new file mode 100644 index 0000000000..4b8b198ba7 --- /dev/null +++ b/include/qemu/nvdimm-utils.h @@ -0,0 +1,7 @@ +#ifndef NVDIMM_UTILS_H +#define NVDIMM_UTILS_H + +#include "qemu/osdep.h" + +GSList *nvdimm_get_device_list(void); +#endif diff --git a/util/Makefile.objs b/util/Makefile.objs index 11262aafaf..6b38b67cf1 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -20,6 +20,7 @@ util-obj-y += envlist.o path.o module.o util-obj-y += host-utils.o util-obj-y += bitmap.o bitops.o hbitmap.o util-obj-y += fifo8.o +util-obj-y += nvdimm-utils.o util-obj-y += cacheinfo.o util-obj-y += error.o qemu-error.o util-obj-y += qemu-print.o diff --git a/util/nvdimm-utils.c b/util/nvdimm-utils.c new file mode 100644 index 0000000000..5cc768ca47 --- /dev/null +++ b/util/nvdimm-utils.c @@ -0,0 +1,29 @@ +#include "qemu/nvdimm-utils.h" +#include "hw/mem/nvdimm.h" + +static int nvdimm_device_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_NVDIMM)) { + *list = g_slist_append(*list, DEVICE(obj)); + } + + object_child_foreach(obj, nvdimm_device_list, opaque); + return 0; +} + +/* + * inquire NVDIMM devices and link them into the list which is + * returned to the caller. + * + * Note: it is the caller's responsibility to free the list to avoid + * memory leak. + */ +GSList *nvdimm_get_device_list(void) +{ + GSList *list = NULL; + + object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list); + return list; +} From 6c5627bb24dcd68c997857a8b671617333b1289f Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Sun, 9 Feb 2020 22:56:13 -0600 Subject: [PATCH 06/20] nvdimm: add uuid property to nvdimm For ppc64, PAPR requires the nvdimm device to have UUID property set in the device tree. Add an option to get it from the user. Signed-off-by: Shivaprasad G Bhat Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Message-Id: <158131056931.2897.14057087440721445976.stgit@lep8c.aus.stglabs.ibm.com> Signed-off-by: David Gibson --- hw/mem/nvdimm.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/hw/mem/nvdimm.h | 7 +++++++ 2 files changed, 47 insertions(+) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 39f1426d1f..8e426d24bb 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -69,11 +69,51 @@ out: error_propagate(errp, local_err); } +static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + char *value = NULL; + + value = qemu_uuid_unparse_strdup(&nvdimm->uuid); + + visit_type_str(v, name, &value, errp); + g_free(value); +} + + +static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + Error *local_err = NULL; + char *value; + + visit_type_str(v, name, &value, &local_err); + if (local_err) { + goto out; + } + + if (qemu_uuid_parse(value, &nvdimm->uuid) != 0) { + error_setg(errp, "Property '%s.%s' has invalid value", + object_get_typename(obj), name); + goto out; + } + g_free(value); + +out: + error_propagate(errp, local_err); +} + + static void nvdimm_init(Object *obj) { object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int", nvdimm_get_label_size, nvdimm_set_label_size, NULL, NULL, NULL); + + object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid, + nvdimm_set_uuid, NULL, NULL, NULL); } static void nvdimm_finalize(Object *obj) diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index 523a9b3d4a..4807ca615b 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -25,6 +25,7 @@ #include "hw/mem/pc-dimm.h" #include "hw/acpi/bios-linker-loader.h" +#include "qemu/uuid.h" #define NVDIMM_DEBUG 0 #define nvdimm_debug(fmt, ...) \ @@ -49,6 +50,7 @@ TYPE_NVDIMM) #define NVDIMM_LABEL_SIZE_PROP "label-size" +#define NVDIMM_UUID_PROP "uuid" #define NVDIMM_UNARMED_PROP "unarmed" struct NVDIMMDevice { @@ -83,6 +85,11 @@ struct NVDIMMDevice { * the guest write persistence. */ bool unarmed; + + /* + * The PPC64 - spapr requires each nvdimm device have a uuid. + */ + QemuUUID uuid; }; typedef struct NVDIMMDevice NVDIMMDevice; From ee3a71e36654317b14ede0290e87628f8b79f850 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Sun, 9 Feb 2020 22:56:31 -0600 Subject: [PATCH 07/20] spapr: Add NVDIMM device support Add support for NVDIMM devices for sPAPR. Piggyback on existing nvdimm device interface in QEMU to support virtual NVDIMM devices for Power. Create the required DT entries for the device (some entries have dummy values right now). The patch creates the required DT node and sends a hotplug interrupt to the guest. Guest is expected to undertake the normal DR resource add path in response and start issuing PAPR SCM hcalls. The device support is verified based on the machine version unlike x86. This is how it can be used .. Ex : For coldplug, the device to be added in qemu command line as shown below -object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm0,share=yes,size=1073872896 -device nvdimm,label-size=128k,uuid=75a3cdd7-6a2f-4791-8d15-fe0a920e8e9e,memdev=memnvdimm0,id=nvdimm0,slot=0 For hotplug, the device to be added from monitor as below object_add memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm0,share=yes,size=1073872896 device_add nvdimm,label-size=128k,uuid=75a3cdd7-6a2f-4791-8d15-fe0a920e8e9e,memdev=memnvdimm0,id=nvdimm0,slot=0 Signed-off-by: Shivaprasad G Bhat Signed-off-by: Bharata B Rao [Early implementation] Message-Id: <158131058078.2897.12767731856697459923.stgit@lep8c.aus.stglabs.ibm.com> Signed-off-by: David Gibson --- default-configs/ppc64-softmmu.mak | 1 + hw/mem/Kconfig | 2 +- hw/ppc/Makefile.objs | 2 +- hw/ppc/spapr.c | 69 ++++++++++-- hw/ppc/spapr_drc.c | 19 ++++ hw/ppc/spapr_events.c | 4 + hw/ppc/spapr_nvdimm.c | 177 ++++++++++++++++++++++++++++++ include/hw/ppc/spapr_drc.h | 9 ++ include/hw/ppc/spapr_nvdimm.h | 37 +++++++ 9 files changed, 309 insertions(+), 11 deletions(-) create mode 100644 hw/ppc/spapr_nvdimm.c create mode 100644 include/hw/ppc/spapr_nvdimm.h diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak index cca52665d9..ae0841fa3a 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -8,3 +8,4 @@ CONFIG_POWERNV=y # For pSeries CONFIG_PSERIES=y +CONFIG_NVDIMM=y diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 620fd4cb59..2ad052a536 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -8,4 +8,4 @@ config MEM_DEVICE config NVDIMM bool default y - depends on PC + depends on (PC || PSERIES) diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index a4bac57be6..c3d3cc56eb 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -7,7 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o -obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o +obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o spapr_nvdimm.o obj-$(CONFIG_SPAPR_RNG) += spapr_rng.o obj-$(call land,$(CONFIG_PSERIES),$(CONFIG_LINUX)) += spapr_pci_vfio.o spapr_pci_nvlink2.o # IBM PowerNV diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 691c391060..cb220fde45 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -80,6 +80,7 @@ #include "hw/ppc/spapr_cpu_core.h" #include "hw/mem/memory-device.h" #include "hw/ppc/spapr_tpm_proxy.h" +#include "hw/ppc/spapr_nvdimm.h" #include "monitor/monitor.h" @@ -675,6 +676,14 @@ static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt, size = di->size; node = di->node; + /* + * The NVDIMM area is hotpluggable after the NVDIMM is unplugged. The + * area is marked hotpluggable in the next iteration for the bigger + * chunk including the NVDIMM occupied area. + */ + if (info->value->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) + continue; + /* Entry for hot-pluggable area */ if (cur_addr < addr) { drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size); @@ -1266,6 +1275,11 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) } } + /* NVDIMM devices */ + if (mc->nvdimm_supported) { + spapr_dt_persistent_memory(fdt); + } + return fdt; } @@ -2629,6 +2643,7 @@ static void spapr_machine_init(MachineState *machine) { SpaprMachineState *spapr = SPAPR_MACHINE(machine); SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; PCIHostState *phb; @@ -2861,6 +2876,10 @@ static void spapr_machine_init(MachineState *machine) "may run and log hardware error on the destination"); } + if (mc->nvdimm_supported) { + spapr_create_nvdimm_dr_connectors(spapr); + } + /* Set up RTAS event infrastructure */ spapr_events_init(spapr); @@ -3430,7 +3449,8 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error *local_err = NULL; SpaprMachineState *ms = SPAPR_MACHINE(hotplug_dev); PCDIMMDevice *dimm = PC_DIMM(dev); - uint64_t size, addr; + uint64_t size, addr, slot; + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort); @@ -3439,14 +3459,24 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - addr = object_property_get_uint(OBJECT(dimm), - PC_DIMM_ADDR_PROP, &local_err); - if (local_err) { - goto out_unplug; + if (!is_nvdimm) { + addr = object_property_get_uint(OBJECT(dimm), + PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + goto out_unplug; + } + spapr_add_lmbs(dev, addr, size, + spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), + &local_err); + } else { + slot = object_property_get_uint(OBJECT(dimm), + PC_DIMM_SLOT_PROP, &local_err); + if (local_err) { + goto out_unplug; + } + spapr_add_nvdimm(dev, slot, &local_err); } - spapr_add_lmbs(dev, addr, size, spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT), - &local_err); if (local_err) { goto out_unplug; } @@ -3464,6 +3494,8 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev); SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev); + const MachineClass *mc = MACHINE_CLASS(smc); + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); PCDIMMDevice *dimm = PC_DIMM(dev); Error *local_err = NULL; uint64_t size; @@ -3475,16 +3507,27 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, return; } + if (is_nvdimm && !mc->nvdimm_supported) { + error_setg(errp, "NVDIMM hotplug not supported for this machine"); + return; + } + size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err); if (local_err) { error_propagate(errp, local_err); return; } - if (size % SPAPR_MEMORY_BLOCK_SIZE) { + if (!is_nvdimm && size % SPAPR_MEMORY_BLOCK_SIZE) { error_setg(errp, "Hotplugged memory size must be a multiple of " - "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB); + "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB); return; + } else if (is_nvdimm) { + spapr_nvdimm_validate_opts(NVDIMM(dev), size, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, @@ -3624,6 +3667,12 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, int i; SpaprDrc *drc; + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + error_setg(&local_err, + "nvdimm device hot unplug is not supported yet."); + goto out; + } + size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort); nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; @@ -4418,6 +4467,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->update_dt_enabled = true; mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0"); mc->has_hotpluggable_cpus = true; + mc->nvdimm_supported = true; smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; @@ -4533,6 +4583,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len); smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF; + mc->nvdimm_supported = false; } DEFINE_SPAPR_MACHINE(4_2, "4.2", false); diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 17aeac3801..fc62e04901 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -22,6 +22,7 @@ #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ #include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ +#include "hw/ppc/spapr_nvdimm.h" #include "sysemu/device_tree.h" #include "sysemu/reset.h" #include "trace.h" @@ -709,6 +710,17 @@ static void spapr_drc_phb_class_init(ObjectClass *k, void *data) drck->dt_populate = spapr_phb_dt_populate; } +static void spapr_drc_pmem_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM; + drck->typename = "PMEM"; + drck->drc_name_prefix = "PMEM "; + drck->release = NULL; + drck->dt_populate = spapr_pmem_dt_populate; +} + static const TypeInfo spapr_dr_connector_info = { .name = TYPE_SPAPR_DR_CONNECTOR, .parent = TYPE_DEVICE, @@ -759,6 +771,12 @@ static const TypeInfo spapr_drc_phb_info = { .class_init = spapr_drc_phb_class_init, }; +static const TypeInfo spapr_drc_pmem_info = { + .name = TYPE_SPAPR_DRC_PMEM, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .class_init = spapr_drc_pmem_class_init, +}; + /* helper functions for external users */ SpaprDrc *spapr_drc_by_index(uint32_t index) @@ -1230,6 +1248,7 @@ static void spapr_drc_register_types(void) type_register_static(&spapr_drc_pci_info); type_register_static(&spapr_drc_lmb_info); type_register_static(&spapr_drc_phb_info); + type_register_static(&spapr_drc_pmem_info); spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator", rtas_set_indicator); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 884e455f02..8b32b7eea5 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -196,6 +196,7 @@ struct rtas_event_log_v6_hp { #define RTAS_LOG_V6_HP_TYPE_SLOT 3 #define RTAS_LOG_V6_HP_TYPE_PHB 4 #define RTAS_LOG_V6_HP_TYPE_PCI 5 +#define RTAS_LOG_V6_HP_TYPE_PMEM 6 uint8_t hotplug_action; #define RTAS_LOG_V6_HP_ACTION_ADD 1 #define RTAS_LOG_V6_HP_ACTION_REMOVE 2 @@ -631,6 +632,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, case SPAPR_DR_CONNECTOR_TYPE_PHB: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PHB; break; + case SPAPR_DR_CONNECTOR_TYPE_PMEM: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PMEM; + break; default: /* we shouldn't be signaling hotplug events for resources * that don't support them diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c new file mode 100644 index 0000000000..d03c8d3a5c --- /dev/null +++ b/hw/ppc/spapr_nvdimm.c @@ -0,0 +1,177 @@ +/* + * QEMU PAPR Storage Class Memory Interfaces + * + * Copyright (c) 2019-2020, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/ppc/spapr_drc.h" +#include "hw/ppc/spapr_nvdimm.h" +#include "hw/mem/nvdimm.h" +#include "qemu/nvdimm-utils.h" +#include "hw/ppc/fdt.h" + +void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, + Error **errp) +{ + char *uuidstr = NULL; + QemuUUID uuid; + + if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) { + error_setg(errp, "NVDIMM memory size excluding the label area" + " must be a multiple of %" PRIu64 "MB", + SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB); + return; + } + + uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP, NULL); + qemu_uuid_parse(uuidstr, &uuid); + g_free(uuidstr); + + if (qemu_uuid_is_null(&uuid)) { + error_setg(errp, "NVDIMM device requires the uuid to be set"); + return; + } +} + + +void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp) +{ + SpaprDrc *drc; + bool hotplugged = spapr_drc_hotplugged(dev); + Error *local_err = NULL; + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); + g_assert(drc); + + spapr_drc_attach(drc, dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (hotplugged) { + spapr_hotplug_req_add_by_index(drc); + } +} + +int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(drc->dev); + + *fdt_start_offset = spapr_dt_nvdimm(fdt, 0, nvdimm); + + return 0; +} + +void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + int i; + + for (i = 0; i < machine->ram_slots; i++) { + spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_PMEM, i); + } +} + + +int spapr_dt_nvdimm(void *fdt, int parent_offset, + NVDIMMDevice *nvdimm) +{ + int child_offset; + char *buf; + SpaprDrc *drc; + uint32_t drc_idx; + uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP, + &error_abort); + uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, + &error_abort); + uint32_t associativity[] = { + cpu_to_be32(0x4), /* length */ + cpu_to_be32(0x0), cpu_to_be32(0x0), + cpu_to_be32(0x0), cpu_to_be32(node) + }; + uint64_t lsize = nvdimm->label_size; + uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + NULL); + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); + g_assert(drc); + + drc_idx = spapr_drc_index(drc); + + buf = g_strdup_printf("ibm,pmemory@%x", drc_idx); + child_offset = fdt_add_subnode(fdt, parent_offset, buf); + g_free(buf); + + _FDT(child_offset); + + _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx))); + _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); + _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); + + _FDT((fdt_setprop(fdt, child_offset, "ibm,associativity", associativity, + sizeof(associativity)))); + + buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); + _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); + g_free(buf); + + _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx))); + + _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size", + SPAPR_MINIMUM_SCM_BLOCK_SIZE))); + _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks", + size / SPAPR_MINIMUM_SCM_BLOCK_SIZE))); + _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize))); + + _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application", + "operating-system"))); + _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0)); + + return child_offset; +} + +void spapr_dt_persistent_memory(void *fdt) +{ + int offset = fdt_subnode_offset(fdt, 0, "persistent-memory"); + GSList *iter, *nvdimms = nvdimm_get_device_list(); + + if (offset < 0) { + offset = fdt_add_subnode(fdt, 0, "persistent-memory"); + _FDT(offset); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", + "ibm,persistent-memory"))); + } + + /* Create DT entries for cold plugged NVDIMM devices */ + for (iter = nvdimms; iter; iter = iter->next) { + NVDIMMDevice *nvdimm = iter->data; + + spapr_dt_nvdimm(fdt, offset, nvdimm); + } + g_slist_free(nvdimms); + + return; +} diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 83f03cc577..df3d958a66 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -78,6 +78,13 @@ #define SPAPR_DRC_PHB(obj) OBJECT_CHECK(SpaprDrc, (obj), \ TYPE_SPAPR_DRC_PHB) +#define TYPE_SPAPR_DRC_PMEM "spapr-drc-pmem" +#define SPAPR_DRC_PMEM_GET_CLASS(obj) \ + OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DRC_PMEM) +#define SPAPR_DRC_PMEM_CLASS(klass) \ + OBJECT_CLASS_CHECK(SpaprDrcClass, klass, TYPE_SPAPR_DRC_PMEM) +#define SPAPR_DRC_PMEM(obj) OBJECT_CHECK(SpaprDrc, (obj), \ + TYPE_SPAPR_DRC_PMEM) /* * Various hotplug types managed by SpaprDrc * @@ -95,6 +102,7 @@ typedef enum { SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO = 3, SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI = 4, SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB = 8, + SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM = 9, } SpaprDrcTypeShift; typedef enum { @@ -104,6 +112,7 @@ typedef enum { SPAPR_DR_CONNECTOR_TYPE_VIO = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO, SPAPR_DR_CONNECTOR_TYPE_PCI = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI, SPAPR_DR_CONNECTOR_TYPE_LMB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB, + SPAPR_DR_CONNECTOR_TYPE_PMEM = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM, } SpaprDrcType; /* diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h new file mode 100644 index 0000000000..b3330cc485 --- /dev/null +++ b/include/hw/ppc/spapr_nvdimm.h @@ -0,0 +1,37 @@ +/* + * QEMU PowerPC PAPR SCM backend definitions + * + * Copyright (c) 2020, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef HW_SPAPR_NVDIMM_H +#define HW_SPAPR_NVDIMM_H + +#include "hw/mem/nvdimm.h" +#include "hw/ppc/spapr.h" + +/* + * The nvdimm size should be aligned to SCM block size. + * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE + * inorder to have SCM regions not to overlap with dimm memory regions. + * The SCM devices can have variable block sizes. For now, fixing the + * block size to the minimum value. + */ +#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE + +/* Have an explicit check for alignment */ +QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); + +int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp); +int spapr_dt_nvdimm(void *fdt, int parent_offset, NVDIMMDevice *nvdimm); +void spapr_dt_persistent_memory(void *fdt); +void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, + Error **errp); +void spapr_add_nvdimm(DeviceState *dev, uint64_t slot, Error **errp); +void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr); + +#endif From b5fca656f7011ca56fdfa93160c4296db9e3f4d9 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Sun, 9 Feb 2020 22:56:42 -0600 Subject: [PATCH 08/20] spapr: Add Hcalls to support PAPR NVDIMM device This patch implements few of the necessary hcalls for the nvdimm support. PAPR semantics is such that each NVDIMM device is comprising of multiple SCM(Storage Class Memory) blocks. The guest requests the hypervisor to bind each of the SCM blocks of the NVDIMM device using hcalls. There can be SCM block unbind requests in case of driver errors or unplug(not supported now) use cases. The NVDIMM label read/writes are done through hcalls. Since each virtual NVDIMM device is divided into multiple SCM blocks, the bind, unbind, and queries using hcalls on those blocks can come independently. This doesn't fit well into the qemu device semantics, where the map/unmap are done at the (whole)device/object level granularity. The patch doesnt actually bind/unbind on hcalls but let it happen at the device_add/del phase itself instead. The guest kernel makes bind/unbind requests for the virtual NVDIMM device at the region level granularity. Without interleaving, each virtual NVDIMM device is presented as a separate guest physical address range. So, there is no way a partial bind/unbind request can come for the vNVDIMM in a hcall for a subset of SCM blocks of a virtual NVDIMM. Hence it is safe to do bind/unbind everything during the device_add/del. Signed-off-by: Shivaprasad G Bhat Message-Id: <158131059899.2897.11515211602702956854.stgit@lep8c.aus.stglabs.ibm.com> Signed-off-by: David Gibson --- hw/ppc/spapr_nvdimm.c | 298 +++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr.h | 8 +- 2 files changed, 305 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index d03c8d3a5c..74eeb8bb74 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -28,6 +28,7 @@ #include "hw/mem/nvdimm.h" #include "qemu/nvdimm-utils.h" #include "hw/ppc/fdt.h" +#include "qemu/range.h" void spapr_nvdimm_validate_opts(NVDIMMDevice *nvdimm, uint64_t size, Error **errp) @@ -175,3 +176,300 @@ void spapr_dt_persistent_memory(void *fdt) return; } + +static target_ulong h_scm_read_metadata(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t offset = args[1]; + uint64_t len = args[2]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + NVDIMMClass *ddc; + uint64_t data = 0; + uint8_t buf[8] = { 0 }; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + if (len != 1 && len != 2 && + len != 4 && len != 8) { + return H_P3; + } + + nvdimm = NVDIMM(drc->dev); + if ((offset + len < offset) || + (nvdimm->label_size < len + offset)) { + return H_P2; + } + + ddc = NVDIMM_GET_CLASS(nvdimm); + ddc->read_label_data(nvdimm, buf, len, offset); + + switch (len) { + case 1: + data = ldub_p(buf); + break; + case 2: + data = lduw_be_p(buf); + break; + case 4: + data = ldl_be_p(buf); + break; + case 8: + data = ldq_be_p(buf); + break; + default: + g_assert_not_reached(); + } + + args[0] = data; + + return H_SUCCESS; +} + +static target_ulong h_scm_write_metadata(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t offset = args[1]; + uint64_t data = args[2]; + uint64_t len = args[3]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + NVDIMMClass *ddc; + uint8_t buf[8] = { 0 }; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + if (len != 1 && len != 2 && + len != 4 && len != 8) { + return H_P4; + } + + nvdimm = NVDIMM(drc->dev); + if ((offset + len < offset) || + (nvdimm->label_size < len + offset)) { + return H_P2; + } + + switch (len) { + case 1: + if (data & 0xffffffffffffff00) { + return H_P2; + } + stb_p(buf, data); + break; + case 2: + if (data & 0xffffffffffff0000) { + return H_P2; + } + stw_be_p(buf, data); + break; + case 4: + if (data & 0xffffffff00000000) { + return H_P2; + } + stl_be_p(buf, data); + break; + case 8: + stq_be_p(buf, data); + break; + default: + g_assert_not_reached(); + } + + ddc = NVDIMM_GET_CLASS(nvdimm); + ddc->write_label_data(nvdimm, buf, len, offset); + + return H_SUCCESS; +} + +static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t starting_idx = args[1]; + uint64_t no_of_scm_blocks_to_bind = args[2]; + uint64_t target_logical_mem_addr = args[3]; + uint64_t continue_token = args[4]; + uint64_t size; + uint64_t total_no_of_scm_blocks; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + hwaddr addr; + NVDIMMDevice *nvdimm; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + /* + * Currently continue token should be zero qemu has already bound + * everything and this hcall doesnt return H_BUSY. + */ + if (continue_token > 0) { + return H_P5; + } + + /* Currently qemu assigns the address. */ + if (target_logical_mem_addr != 0xffffffffffffffff) { + return H_OVERLAP; + } + + nvdimm = NVDIMM(drc->dev); + + size = object_property_get_uint(OBJECT(nvdimm), + PC_DIMM_SIZE_PROP, &error_abort); + + total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + + if (starting_idx > total_no_of_scm_blocks) { + return H_P2; + } + + if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) || + ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) { + return H_P3; + } + + addr = object_property_get_uint(OBJECT(nvdimm), + PC_DIMM_ADDR_PROP, &error_abort); + + addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE; + + /* Already bound, Return target logical address in R5 */ + args[1] = addr; + args[2] = no_of_scm_blocks_to_bind; + + return H_SUCCESS; +} + +static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t starting_scm_logical_addr = args[1]; + uint64_t no_of_scm_blocks_to_unbind = args[2]; + uint64_t continue_token = args[3]; + uint64_t size_to_unbind; + Range blockrange = range_empty; + Range nvdimmrange = range_empty; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + uint64_t size, addr; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + /* continue_token should be zero as this hcall doesn't return H_BUSY. */ + if (continue_token > 0) { + return H_P4; + } + + /* Check if starting_scm_logical_addr is block aligned */ + if (!QEMU_IS_ALIGNED(starting_scm_logical_addr, + SPAPR_MINIMUM_SCM_BLOCK_SIZE)) { + return H_P2; + } + + size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE; + if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind != + size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) { + return H_P3; + } + + nvdimm = NVDIMM(drc->dev); + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP, + &error_abort); + + range_init_nofail(&nvdimmrange, addr, size); + range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind); + + if (!range_contains_range(&nvdimmrange, &blockrange)) { + return H_P3; + } + + args[1] = no_of_scm_blocks_to_unbind; + + /* let unplug take care of actual unbind */ + return H_SUCCESS; +} + +#define H_UNBIND_SCOPE_ALL 0x1 +#define H_UNBIND_SCOPE_DRC 0x2 + +static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint64_t target_scope = args[0]; + uint32_t drc_index = args[1]; + uint64_t continue_token = args[2]; + NVDIMMDevice *nvdimm; + uint64_t size; + uint64_t no_of_scm_blocks_unbound = 0; + + /* continue_token should be zero as this hcall doesn't return H_BUSY. */ + if (continue_token > 0) { + return H_P4; + } + + if (target_scope == H_UNBIND_SCOPE_DRC) { + SpaprDrc *drc = spapr_drc_by_index(drc_index); + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_P2; + } + + nvdimm = NVDIMM(drc->dev); + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + + no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + } else if (target_scope == H_UNBIND_SCOPE_ALL) { + GSList *list, *nvdimms; + + nvdimms = nvdimm_get_device_list(); + for (list = nvdimms; list; list = list->next) { + nvdimm = list->data; + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + + no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + } + g_slist_free(nvdimms); + } else { + return H_PARAMETER; + } + + args[1] = no_of_scm_blocks_unbound; + + /* let unplug take care of actual unbind */ + return H_SUCCESS; +} + +static void spapr_scm_register_types(void) +{ + /* qemu/scm specific hcalls */ + spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata); + spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata); + spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); + spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); + spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); +} + +type_init(spapr_scm_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index a1fba95c82..d557fc1f35 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -300,6 +300,7 @@ struct SpaprMachineState { #define H_P7 -60 #define H_P8 -61 #define H_P9 -62 +#define H_OVERLAP -68 #define H_UNSUPPORTED_FLAG -256 #define H_MULTI_THREADS_ACTIVE -9005 @@ -507,8 +508,13 @@ struct SpaprMachineState { #define H_INT_ESB 0x3C8 #define H_INT_SYNC 0x3CC #define H_INT_RESET 0x3D0 +#define H_SCM_READ_METADATA 0x3E4 +#define H_SCM_WRITE_METADATA 0x3E8 +#define H_SCM_BIND_MEM 0x3EC +#define H_SCM_UNBIND_MEM 0x3F0 +#define H_SCM_UNBIND_ALL 0x3FC -#define MAX_HCALL_OPCODE H_INT_RESET +#define MAX_HCALL_OPCODE H_SCM_UNBIND_ALL /* The hcalls above are standardized in PAPR and implemented by pHyp * as well. From 19b5c4186eccc519e6906838b288b2e206d43efe Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Wed, 12 Feb 2020 23:26:14 +0100 Subject: [PATCH 09/20] target/ppc/cpu.h: Remove duplicate includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 74433bf083b added some includes but added them twice. Since these are guarded against multiple inclusion including them once is enough. Signed-off-by: BALATON Zoltan Message-Id: <20200212223207.5A37574637F@zero.eik.bme.hu> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: David Gibson --- target/ppc/cpu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 3a1eb76004..07dd2b4da7 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -23,8 +23,6 @@ #include "qemu/int128.h" #include "exec/cpu-defs.h" #include "cpu-qom.h" -#include "exec/cpu-defs.h" -#include "cpu-qom.h" /* #define PPC_EMULATE_32BITS_HYPV */ From 5a205fcf7700abe442b374d76a8c4edea92eecc7 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 12 Feb 2020 19:54:00 +0100 Subject: [PATCH 10/20] pnv/phb3: Convert 1u to 1ull As reported by Coverity defect CID 1419397, the 'j' variable goes up to 63 and shouldn't be used to left shift a 32-bit integer. The result of the operation goes to a 64-bit integer : use a 64-bit constant. Reported-by: Coverity CID 1419397 Bad bit shift operation Fixes: 9ae1329ee2fe "ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge" Signed-off-by: Greg Kurz Message-Id: <158153364010.3229002.8004283672455615950.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/pci-host/pnv_phb3_msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c index ecfc1b2c4e..d645468f4a 100644 --- a/hw/pci-host/pnv_phb3_msi.c +++ b/hw/pci-host/pnv_phb3_msi.c @@ -220,7 +220,7 @@ static void phb3_msi_resend(ICSState *ics) if ((msi->rba[i] & (1ull << j)) == 0) { continue; } - msi->rba[i] &= ~(1u << j); + msi->rba[i] &= ~(1ull << j); phb3_msi_try_send(msi, i * 64 + j, true); } } From 7cfb999fe9c80c72f3f76043ce1bf8dee0c020af Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 12 Feb 2020 19:54:06 +0100 Subject: [PATCH 11/20] pnv/phb4: Fix error path in pnv_pec_realize() Obviously, we want to pass &local_err so that we can check it then line below, not errp. Reported-by: Coverity CID 1419395 'Constant' variable guards dead code Fixes: 4f9924c4d4cf "ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge" Signed-off-by: Greg Kurz Message-Id: <158153364605.3229002.2796177658957390343.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/pci-host/pnv_phb4_pec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 68e1db3eac..911d147ffd 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -391,7 +391,7 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp) object_property_set_int(stk_obj, i, "stack-no", &error_abort); object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort); - object_property_set_bool(stk_obj, true, "realized", errp); + object_property_set_bool(stk_obj, true, "realized", &local_err); if (local_err) { error_propagate(errp, local_err); return; From e8ead7d579645a1866813ce563882cad055088b9 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 12 Feb 2020 19:54:12 +0100 Subject: [PATCH 12/20] pnv/phb3: Add missing break statement We obviously don't want to print out an error message if addr points to a valid register. Reported-by: Coverity CID 1419391 Missing break in switch Fixes: 9ae1329ee2fe "ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge" Signed-off-by: Greg Kurz Message-Id: <158153365202.3229002.11521084761048102466.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/pci-host/pnv_phb3_pbcq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c index f232228b0e..7b9a121246 100644 --- a/hw/pci-host/pnv_phb3_pbcq.c +++ b/hw/pci-host/pnv_phb3_pbcq.c @@ -173,6 +173,7 @@ static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr, case PBCQ_PCI_BAR2: pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull; pnv_pbcq_update_map(pbcq); + break; default: phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__, addr, val); From 87262806cb48c247ae55d99e280ba96027648c2d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 3 Feb 2020 14:29:42 +1100 Subject: [PATCH 13/20] spapr: Allow changing offset for -kernel image This allows moving the kernel in the guest memory. The option is useful for step debugging (as Linux is linked at 0x0); it also allows loading grub which is normally linked to run at 0x20000. This uses the existing kernel address by default. Signed-off-by: Alexey Kardashevskiy Message-Id: <20200203032943.121178-6-aik@ozlabs.ru> Reviewed-by: Fabiano Rosas Signed-off-by: David Gibson --- hw/ppc/spapr.c | 38 +++++++++++++++++++++++++++++++------- include/hw/ppc/spapr.h | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index cb220fde45..828e2cc135 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1064,7 +1064,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt) } if (spapr->kernel_size) { - uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR), + uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr), cpu_to_be64(spapr->kernel_size) }; _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel", @@ -1252,7 +1252,8 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) /* Build memory reserve map */ if (reset) { if (spapr->kernel_size) { - _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size))); + _FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr, + spapr->kernel_size))); } if (spapr->initrd_size) { _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, @@ -1285,7 +1286,9 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) static uint64_t translate_kernel_address(void *opaque, uint64_t addr) { - return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR; + SpaprMachineState *spapr = opaque; + + return (addr & 0x0fffffff) + spapr->kernel_addr; } static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, @@ -2967,14 +2970,15 @@ static void spapr_machine_init(MachineState *machine) uint64_t lowaddr = 0; spapr->kernel_size = load_elf(kernel_filename, NULL, - translate_kernel_address, NULL, + translate_kernel_address, spapr, NULL, &lowaddr, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) { spapr->kernel_size = load_elf(kernel_filename, NULL, - translate_kernel_address, NULL, NULL, + translate_kernel_address, spapr, NULL, &lowaddr, NULL, NULL, 0, - PPC_ELF_MACHINE, 0, 0); + PPC_ELF_MACHINE, + 0, 0); spapr->kernel_le = spapr->kernel_size > 0; } if (spapr->kernel_size < 0) { @@ -2988,7 +2992,7 @@ static void spapr_machine_init(MachineState *machine) /* Try to locate the initrd in the gap between the kernel * and the firmware. Add a bit of space just in case */ - spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size + spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size + 0x1ffff) & ~0xffff; spapr->initrd_size = load_image_targphys(initrd_filename, spapr->initrd_base, @@ -3234,6 +3238,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name, visit_type_uint32(v, name, (uint32_t *)opaque, errp); } +static void spapr_get_kernel_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, (uint64_t *)opaque, errp); +} + +static void spapr_set_kernel_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, (uint64_t *)opaque, errp); +} + static char *spapr_get_ic_mode(Object *obj, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); @@ -3339,6 +3355,14 @@ static void spapr_instance_init(Object *obj) object_property_add_bool(obj, "vfio-no-msix-emulation", spapr_get_msix_emulation, NULL, NULL); + object_property_add(obj, "kernel-addr", "uint64", spapr_get_kernel_addr, + spapr_set_kernel_addr, NULL, &spapr->kernel_addr, + &error_abort); + object_property_set_description(obj, "kernel-addr", + stringify(KERNEL_LOAD_ADDR) + " for -kernel is the default", + NULL); + spapr->kernel_addr = KERNEL_LOAD_ADDR; /* The machine class defines the default interrupt controller mode */ spapr->irq = smc->irq; object_property_add_str(obj, "ic-mode", spapr_get_ic_mode, diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index d557fc1f35..09110961a5 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -162,6 +162,7 @@ struct SpaprMachineState { void *fdt_blob; long kernel_size; bool kernel_le; + uint64_t kernel_addr; uint32_t initrd_base; long initrd_size; uint64_t rtc_offset; /* Now used only during incoming migration */ From 92eeb004e8832d0020be056eff722702eafde4dc Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Fri, 14 Feb 2020 00:57:34 +0100 Subject: [PATCH 14/20] target/ppc: Fix typo in comments "Deferred" was misspelled as "differed" in some comments, correct this typo, Signed-off-by: BALATON Zoltan Message-Id: <20200214155748.0896B745953@zero.eik.bme.hu> Signed-off-by: David Gibson --- target/ppc/fpu_helper.c | 4 ++-- target/ppc/translate/fp-impl.inc.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index dc383242f7..ae43b08eb5 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -293,7 +293,7 @@ static void float_invalid_op_vxvc(CPUPPCState *env, bool set_fpcc, env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_VXVC; /* Update the floating-point enabled exception summary */ env->fpscr |= FP_FEX; - /* Exception is differed */ + /* Exception is deferred */ } } @@ -644,7 +644,7 @@ static void do_float_check_status(CPUPPCState *env, uintptr_t raddr) if (cs->exception_index == POWERPC_EXCP_PROGRAM && (env->error_code & POWERPC_EXCP_FP)) { - /* Differred floating-point exception after target FPR update */ + /* Deferred floating-point exception after target FPR update */ if (fp_exceptions_enabled(env)) { raise_exception_err_ra(env, cs->exception_index, env->error_code, raddr); diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c index d8e27bf4d5..9f7868ee28 100644 --- a/target/ppc/translate/fp-impl.inc.c +++ b/target/ppc/translate/fp-impl.inc.c @@ -781,7 +781,7 @@ static void gen_mtfsb1(DisasContext *ctx) tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } - /* We can raise a differed exception */ + /* We can raise a deferred exception */ gen_helper_float_check_status(cpu_env); } @@ -817,7 +817,7 @@ static void gen_mtfsf(DisasContext *ctx) tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } - /* We can raise a differed exception */ + /* We can raise a deferred exception */ gen_helper_float_check_status(cpu_env); tcg_temp_free_i64(t1); } @@ -850,7 +850,7 @@ static void gen_mtfsfi(DisasContext *ctx) tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } - /* We can raise a differed exception */ + /* We can raise a deferred exception */ gen_helper_float_check_status(cpu_env); } From b561615db2fcdb2c3710cf0a0b2757b56bda922e Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Sun, 16 Feb 2020 22:23:54 +0100 Subject: [PATCH 15/20] target/ppc/cpu.h: Move fpu related members closer in cpu env Move fp_status and fpscr closer to other floating point and vector related members in cpu env definition so they are in one group. Signed-off-by: BALATON Zoltan Message-Id: <5b50e9e7eec2c383ae878b397d0b2927efc9ea43.1581888834.git.balaton@eik.bme.hu> Signed-off-by: David Gibson --- target/ppc/cpu.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 07dd2b4da7..c3b0a00064 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -997,11 +997,6 @@ struct CPUPPCState { /* temporary general purpose registers */ target_ulong tgpr[4]; /* Used to speed-up TLB assist handlers */ - /* Floating point execution context */ - float_status fp_status; - /* floating point status and control register */ - target_ulong fpscr; - /* Next instruction pointer */ target_ulong nip; @@ -1060,6 +1055,10 @@ struct CPUPPCState { * used simultaneously */ float_status vec_status; + /* Floating point execution context */ + float_status fp_status; + /* floating point status and control register */ + target_ulong fpscr; /* Internal devices resources */ /* Time base and decrementer */ From ad5db2e732c0ed532fe852a2ccf15ab637cb2366 Mon Sep 17 00:00:00 2001 From: BALATON Zoltan Date: Sun, 16 Feb 2020 22:23:54 +0100 Subject: [PATCH 16/20] target/ppc/cpu.h: Clean up comments in the struct CPUPPCState definition The cpu env struct is quite complex but comments supposed to explain it in its definition just make it harder to read. Reformat and reword some comments to make it clearer and more readable. Signed-off-by: BALATON Zoltan Message-Id: <8707144ab1ccf9c5c89a39c2d7a0b02307ca25d4.1581888834.git.balaton@eik.bme.hu> Signed-off-by: David Gibson --- target/ppc/cpu.h | 145 ++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 91 deletions(-) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c3b0a00064..b283042515 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -960,116 +960,88 @@ struct ppc_radix_page_info { #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 struct CPUPPCState { - /* - * First are the most commonly used resources during translated - * code execution - */ - /* general purpose registers */ - target_ulong gpr[32]; - /* Storage for GPR MSB, used by the SPE extension */ - target_ulong gprh[32]; - /* LR */ + /* Most commonly used resources during translated code execution first */ + target_ulong gpr[32]; /* general purpose registers */ + target_ulong gprh[32]; /* storage for GPR MSB, used by the SPE extension */ target_ulong lr; - /* CTR */ target_ulong ctr; - /* condition register */ - uint32_t crf[8]; + uint32_t crf[8]; /* condition register */ #if defined(TARGET_PPC64) - /* CFAR */ target_ulong cfar; #endif - /* XER (with SO, OV, CA split out) */ - target_ulong xer; + target_ulong xer; /* XER (with SO, OV, CA split out) */ target_ulong so; target_ulong ov; target_ulong ca; target_ulong ov32; target_ulong ca32; - /* Reservation address */ - target_ulong reserve_addr; - /* Reservation value */ - target_ulong reserve_val; + + target_ulong reserve_addr; /* Reservation address */ + target_ulong reserve_val; /* Reservation value */ target_ulong reserve_val2; - /* Those ones are used in supervisor mode only */ - /* machine state register */ - target_ulong msr; - /* temporary general purpose registers */ - target_ulong tgpr[4]; /* Used to speed-up TLB assist handlers */ + /* These are used in supervisor mode only */ + target_ulong msr; /* machine state register */ + target_ulong tgpr[4]; /* temporary general purpose registers, */ + /* used to speed-up TLB assist handlers */ - /* Next instruction pointer */ - target_ulong nip; - - /* High part of 128-bit helper return. */ - uint64_t retxh; + target_ulong nip; /* next instruction pointer */ + uint64_t retxh; /* high part of 128-bit helper return */ /* when a memory exception occurs, the access type is stored here */ int access_type; - /* MMU context - only relevant for full system emulation */ #if !defined(CONFIG_USER_ONLY) + /* MMU context, only relevant for full system emulation */ #if defined(TARGET_PPC64) - /* PowerPC 64 SLB area */ - ppc_slb_t slb[MAX_SLB_ENTRIES]; - /* tcg TLB needs flush (deferred slb inval instruction typically) */ + ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ #endif - /* segment registers */ - target_ulong sr[32]; - /* BATs */ - uint32_t nb_BATs; + target_ulong sr[32]; /* segment registers */ + uint32_t nb_BATs; /* number of BATs */ target_ulong DBAT[2][8]; target_ulong IBAT[2][8]; /* PowerPC TLB registers (for 4xx, e500 and 60x software driven TLBs) */ - int32_t nb_tlb; /* Total number of TLB */ + int32_t nb_tlb; /* Total number of TLB */ int tlb_per_way; /* Speed-up helper: used to avoid divisions at run time */ - int nb_ways; /* Number of ways in the TLB set */ - int last_way; /* Last used way used to allocate TLB in a LRU way */ + int nb_ways; /* Number of ways in the TLB set */ + int last_way; /* Last used way used to allocate TLB in a LRU way */ int id_tlbs; /* If 1, MMU has separated TLBs for instructions & data */ - int nb_pids; /* Number of available PID registers */ - int tlb_type; /* Type of TLB we're dealing with */ - ppc_tlb_t tlb; /* TLB is optional. Allocate them only if needed */ - /* 403 dedicated access protection registers */ - target_ulong pb[4]; - bool tlb_dirty; /* Set to non-zero when modifying TLB */ - bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */ + int nb_pids; /* Number of available PID registers */ + int tlb_type; /* Type of TLB we're dealing with */ + ppc_tlb_t tlb; /* TLB is optional. Allocate them only if needed */ + target_ulong pb[4]; /* 403 dedicated access protection registers */ + bool tlb_dirty; /* Set to non-zero when modifying TLB */ + bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */ uint32_t tlb_need_flush; /* Delayed flush needed */ #define TLB_NEED_LOCAL_FLUSH 0x1 #define TLB_NEED_GLOBAL_FLUSH 0x2 #endif /* Other registers */ - /* Special purpose registers */ - target_ulong spr[1024]; + target_ulong spr[1024]; /* special purpose registers */ ppc_spr_t spr_cb[1024]; - /* Vector status and control register, minus VSCR_SAT. */ + /* Vector status and control register, minus VSCR_SAT */ uint32_t vscr; /* VSX registers (including FP and AVR) */ ppc_vsr_t vsr[64] QEMU_ALIGNED(16); - /* Non-zero if and only if VSCR_SAT should be set. */ + /* Non-zero if and only if VSCR_SAT should be set */ ppc_vsr_t vscr_sat QEMU_ALIGNED(16); /* SPE registers */ uint64_t spe_acc; uint32_t spe_fscr; - /* - * SPE and Altivec can share a status since they will never be - * used simultaneously - */ + /* SPE and Altivec share status as they'll never be used simultaneously */ float_status vec_status; - /* Floating point execution context */ - float_status fp_status; - /* floating point status and control register */ - target_ulong fpscr; + float_status fp_status; /* Floating point execution context */ + target_ulong fpscr; /* Floating point status and control register */ /* Internal devices resources */ - /* Time base and decrementer */ - ppc_tb_t *tb_env; - /* Device control registers */ - ppc_dcr_t *dcr_env; + ppc_tb_t *tb_env; /* Time base and decrementer */ + ppc_dcr_t *dcr_env; /* Device control registers */ int dcache_line_size; int icache_line_size; - /* Those resources are used during exception processing */ + /* These resources are used during exception processing */ /* CPU model definition */ target_ulong msr_mask; powerpc_mmu_t mmu_model; @@ -1088,58 +1060,49 @@ struct CPUPPCState { uint32_t pending_interrupts; #if !defined(CONFIG_USER_ONLY) /* - * This is the IRQ controller, which is implementation dependent - * and only relevant when emulating a complete machine. Note that - * this isn't used by recent Book3s compatible CPUs (POWER7 and - * newer). + * This is the IRQ controller, which is implementation dependent and only + * relevant when emulating a complete machine. Note that this isn't used + * by recent Book3s compatible CPUs (POWER7 and newer). */ uint32_t irq_input_state; void **irq_inputs; - /* Exception vectors */ - target_ulong excp_vectors[POWERPC_EXCP_NB]; + + target_ulong excp_vectors[POWERPC_EXCP_NB]; /* Exception vectors */ target_ulong excp_prefix; target_ulong ivor_mask; target_ulong ivpr_mask; target_ulong hreset_vector; hwaddr mpic_iack; - /* true when the external proxy facility mode is enabled */ - bool mpic_proxy; + bool mpic_proxy; /* true if the external proxy facility mode is enabled */ + bool has_hv_mode; /* set when the processor has an HV mode, thus HV priv */ + /* instructions and SPRs are diallowed if MSR:HV is 0 */ /* - * set when the processor has an HV mode, thus HV priv - * instructions and SPRs are diallowed if MSR:HV is 0 - */ - bool has_hv_mode; - - /* - * On P7/P8/P9, set when in PM state, we need to handle resume in - * a special way (such as routing some resume causes to 0x100, ie, - * sreset), so flag this here. + * On P7/P8/P9, set when in PM state so we need to handle resume in a + * special way (such as routing some resume causes to 0x100, i.e. sreset). */ bool resume_as_sreset; #endif - /* Those resources are used only in QEMU core */ - target_ulong hflags; /* hflags is a MSR & HFLAGS_MASK */ + /* These resources are used only in QEMU core */ + target_ulong hflags; /* hflags is MSR & HFLAGS_MASK */ target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */ - int immu_idx; /* precomputed MMU index to speed up insn access */ - int dmmu_idx; /* precomputed MMU index to speed up data accesses */ + int immu_idx; /* precomputed MMU index to speed up insn accesses */ + int dmmu_idx; /* precomputed MMU index to speed up data accesses */ /* Power management */ int (*check_pow)(CPUPPCState *env); #if !defined(CONFIG_USER_ONLY) - void *load_info; /* Holds boot loading state. */ + void *load_info; /* holds boot loading state */ #endif /* booke timers */ /* - * Specifies bit locations of the Time Base used to signal a fixed - * timer exception on a transition from 0 to 1. (watchdog or - * fixed-interval timer) + * Specifies bit locations of the Time Base used to signal a fixed timer + * exception on a transition from 0 to 1 (watchdog or fixed-interval timer) * - * 0 selects the least significant bit. - * 63 selects the most significant bit. + * 0 selects the least significant bit, 63 selects the most significant bit */ uint8_t fit_period[4]; uint8_t wdt_period[4]; From b2fb7a4368ba99173c9dbf084aa5a7f04cc8bb8e Mon Sep 17 00:00:00 2001 From: Pan Nengyuan Date: Fri, 14 Feb 2020 11:32:06 +0800 Subject: [PATCH 17/20] ppc: free 'fdt' after reset the machine 'fdt' forgot to clean both e500 and pnv when we call 'system_reset' on ppc, this patch fix it. The leak stacks are as follow: Direct leak of 4194304 byte(s) in 4 object(s) allocated from: #0 0x7fafe37dd970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) #1 0x7fafe2e3149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) #2 0x561876f7f80d in create_device_tree /mnt/sdb/qemu-new/qemu/device_tree.c:40 #3 0x561876b7ac29 in ppce500_load_device_tree /mnt/sdb/qemu-new/qemu/hw/ppc/e500.c:364 #4 0x561876b7f437 in ppce500_reset_device_tree /mnt/sdb/qemu-new/qemu/hw/ppc/e500.c:617 #5 0x56187718b1ae in qemu_devices_reset /mnt/sdb/qemu-new/qemu/hw/core/reset.c:69 #6 0x561876f6938d in qemu_system_reset /mnt/sdb/qemu-new/qemu/vl.c:1412 #7 0x561876f6a25b in main_loop_should_exit /mnt/sdb/qemu-new/qemu/vl.c:1645 #8 0x561876f6a398 in main_loop /mnt/sdb/qemu-new/qemu/vl.c:1679 #9 0x561876f7da8e in main /mnt/sdb/qemu-new/qemu/vl.c:4438 #10 0x7fafde16b812 in __libc_start_main ../csu/libc-start.c:308 #11 0x5618765c055d in _start (/mnt/sdb/qemu-new/qemu/build/ppc64-softmmu/qemu-system-ppc64+0x2b1555d) Direct leak of 1048576 byte(s) in 1 object(s) allocated from: #0 0x7fc0a6f1b970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) #1 0x7fc0a656f49d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) #2 0x55eb05acd2ca in pnv_dt_create /mnt/sdb/qemu-new/qemu/hw/ppc/pnv.c:507 #3 0x55eb05ace5bf in pnv_reset /mnt/sdb/qemu-new/qemu/hw/ppc/pnv.c:578 #4 0x55eb05f2f395 in qemu_system_reset /mnt/sdb/qemu-new/qemu/vl.c:1410 #5 0x55eb05f43850 in main /mnt/sdb/qemu-new/qemu/vl.c:4403 #6 0x7fc0a18a9812 in __libc_start_main ../csu/libc-start.c:308 #7 0x55eb0558655d in _start (/mnt/sdb/qemu-new/qemu/build/ppc64-softmmu/qemu-system-ppc64+0x2b1555d) Reported-by: Euler Robot Signed-off-by: Pan Nengyuan Message-Id: <20200214033206.4395-1-pannengyuan@huawei.com> Reviewed-by: Greg Kurz Signed-off-by: David Gibson --- hw/ppc/e500.c | 1 + hw/ppc/pnv.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 886442e54f..af537bba2b 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -594,6 +594,7 @@ done: cpu_physical_memory_write(addr, fdt, fdt_size); } ret = fdt_size; + g_free(fdt); out: g_free(pci_map); diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 139c857b1e..e98038b809 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -582,6 +582,8 @@ static void pnv_reset(MachineState *machine) qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt)); + + g_free(fdt); } static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp) From 4b63db1289a9e597bc151fa5e4d72f882cb6de1e Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 14 Feb 2020 16:01:22 +0100 Subject: [PATCH 18/20] spapr: Don't use spapr_drc_needed() in CAS code We currently don't support hotplug of devices between boot and CAS. If this happens a CAS reboot is triggered. We detect this during CAS using the spapr_drc_needed() function which is essentially a VMStateDescription .needed callback. Even if the condition for CAS reboot happens to be the same as for DRC migration, it looks wrong to piggyback a migration helper for this. Introduce a helper with slightly more explicit name and use it in both CAS and DRC migration code. Since a subsequent patch will enhance this helper to cover the case of hot unplug, let's go for spapr_drc_transient(). While here convert spapr_hotplugged_dev_before_cas() to the "transient" wording as well. This doesn't change any behaviour. Signed-off-by: Greg Kurz Message-Id: <158169248180.3465937.9531405453362718771.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 20 ++++++++++++++------ hw/ppc/spapr_hcall.c | 14 +++++++++----- include/hw/ppc/spapr_drc.h | 4 +++- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index fc62e04901..4c35ce7c5c 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -456,23 +456,31 @@ void spapr_drc_reset(SpaprDrc *drc) } } -bool spapr_drc_needed(void *opaque) +bool spapr_drc_transient(SpaprDrc *drc) { - SpaprDrc *drc = (SpaprDrc *)opaque; SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - /* If no dev is plugged in there is no need to migrate the DRC state */ + /* + * If no dev is plugged in there is no need to migrate the DRC state + * nor to reset the DRC at CAS. + */ if (!drc->dev) { return false; } /* - * We need to migrate the state if it's not equal to the expected - * long-term state, which is the same as the coldplugged initial - * state */ + * We need to reset the DRC at CAS or to migrate the DRC state if it's + * not equal to the expected long-term state, which is the same as the + * coldplugged initial state. + */ return (drc->state != drck->ready_state); } +static bool spapr_drc_needed(void *opaque) +{ + return spapr_drc_transient(opaque); +} + static const VMStateDescription vmstate_spapr_drc = { .name = "spapr_drc", .version_id = 1, diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b8bb66b5c0..6db3dbde9c 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1640,20 +1640,24 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu, return best_compat; } -static bool spapr_hotplugged_dev_before_cas(void) +static bool spapr_transient_dev_before_cas(void) { - Object *drc_container, *obj; + Object *drc_container; ObjectProperty *prop; ObjectPropertyIterator iter; drc_container = container_get(object_get_root(), "/dr-connector"); object_property_iter_init(&iter, drc_container); while ((prop = object_property_iter_next(&iter))) { + SpaprDrc *drc; + if (!strstart(prop->type, "link<", NULL)) { continue; } - obj = object_property_get_link(drc_container, prop->name, NULL); - if (spapr_drc_needed(obj)) { + drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container, + prop->name, NULL)); + + if (spapr_drc_transient(drc)) { return true; } } @@ -1830,7 +1834,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, spapr_irq_update_active_intc(spapr); - if (spapr_hotplugged_dev_before_cas()) { + if (spapr_transient_dev_before_cas()) { spapr->cas_reboot = true; } diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index df3d958a66..21af8deac1 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -278,7 +278,9 @@ int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask); void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp); void spapr_drc_detach(SpaprDrc *drc); -bool spapr_drc_needed(void *opaque); + +/* Returns true if a hot plug/unplug request is pending */ +bool spapr_drc_transient(SpaprDrc *drc); static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) { From ab8584349c476f9818dc6403359c85f9ab0ad5eb Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 14 Feb 2020 16:01:28 +0100 Subject: [PATCH 19/20] spapr: Fix handling of unplugged devices during CAS and migration We already detect if a device is being hot plugged before CAS to trigger a CAS reboot and during migration to migrate the state of the associated DRC. But hot unplugging a device is also an asynchronous operation that requires the guest to take action. This means that if the guest is migrated after the hot unplug event was sent but before it could release the device with RTAS, the destination QEMU doesn't know about the pending unplug operation and doesn't actually remove the device when the guest finally releases it. Similarly, if the unplug request is fired before CAS, the guest isn't notified of the change, just like with hotplug. It ends up booting with the device still present in the DT and configures it, just like it was never removed. Even weirder, since the event is still queued, it will be eventually processed when some other unrelated event is posted to the guest. Enhance spapr_drc_transient() to also return true if an unplug request is pending. This fixes the issue at CAS with a CAS reboot request and causes the DRC state to be migrated. Some extra care is still needed to inform the destination that an unplug request is pending : migrate the unplug_requested field of the DRC in an optional subsection. This might break backwards migration, but this is still better than ending with an inconsistent guest. Signed-off-by: Greg Kurz Message-Id: <158169248798.3465937.1108351365840514270.stgit@bahia.lan> Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 4c35ce7c5c..e373d342eb 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -456,6 +456,22 @@ void spapr_drc_reset(SpaprDrc *drc) } } +static bool spapr_drc_unplug_requested_needed(void *opaque) +{ + return spapr_drc_unplug_requested(opaque); +} + +static const VMStateDescription vmstate_spapr_drc_unplug_requested = { + .name = "spapr_drc/unplug_requested", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_unplug_requested_needed, + .fields = (VMStateField []) { + VMSTATE_BOOL(unplug_requested, SpaprDrc), + VMSTATE_END_OF_LIST() + } +}; + bool spapr_drc_transient(SpaprDrc *drc) { SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -471,9 +487,10 @@ bool spapr_drc_transient(SpaprDrc *drc) /* * We need to reset the DRC at CAS or to migrate the DRC state if it's * not equal to the expected long-term state, which is the same as the - * coldplugged initial state. + * coldplugged initial state, or if an unplug request is pending. */ - return (drc->state != drck->ready_state); + return drc->state != drck->ready_state || + spapr_drc_unplug_requested(drc); } static bool spapr_drc_needed(void *opaque) @@ -489,6 +506,10 @@ static const VMStateDescription vmstate_spapr_drc = { .fields = (VMStateField []) { VMSTATE_UINT32(state, SpaprDrc), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_spapr_drc_unplug_requested, + NULL } }; From 438bafcac55308eef4f9029c94dbadd2c7ac3bb7 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Tue, 18 Feb 2020 17:11:53 +0800 Subject: [PATCH 20/20] hw/ppc/virtex_ml507:fix leak of fdevice tree blob The device tree blob returned by load_device_tree is malloced. We should free it after cpu_physical_memory_write(). Reported-by: Euler Robot Signed-off-by: Chen Qun Message-Id: <20200218091154.21696-3-kuhn.chenqun@huawei.com> Signed-off-by: David Gibson --- hw/ppc/virtex_ml507.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 91dd00ee91..4eef70069f 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -188,6 +188,7 @@ static int xilinx_load_device_tree(hwaddr addr, if (r < 0) fprintf(stderr, "couldn't set /chosen/bootargs\n"); cpu_physical_memory_write(addr, fdt, fdt_size); + g_free(fdt); return fdt_size; }