From 8c8f62baa61b55b297453a8d416c6b0932398948 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 21 Jul 2025 10:07:52 +0100 Subject: [PATCH 01/20] hvf: arm: Remove $pc from trace_hvf_data_abort() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't synchronize vcpu registers from the hardware accelerator (e.g., by cpu_synchronize_state()) in the Dabort handler, so env->pc points to the instruction which has nothing to do with the Dabort at all. And it doesn't seem to make much sense to log PC in every Dabort handler, let's just remove it from this trace event. Signed-off-by: Zenghui Yu Reviewed-by: Mads Ynddal Message-id: 20250713154719.4248-1-zenghui.yu@linux.dev Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Peter Maydell --- target/arm/hvf/hvf.c | 2 +- target/arm/hvf/trace-events | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index c9cfcdc08b..8f93e42b34 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -2005,7 +2005,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t cm = (syndrome >> 8) & 0x1; uint64_t val = 0; - trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address, + trace_hvf_data_abort(hvf_exit->exception.virtual_address, hvf_exit->exception.physical_address, isv, iswrite, s1ptw, len, srt); diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events index b49746f28d..b29a995f3d 100644 --- a/target/arm/hvf/trace-events +++ b/target/arm/hvf/trace-events @@ -2,7 +2,7 @@ hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)" hvf_inject_fiq(void) "injecting FIQ" hvf_inject_irq(void) "injecting IRQ" -hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" +hvf_data_abort(uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]" hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64 hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")" hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64 From 655659a74a36b63e33d2dc969d3c44beb1b008b3 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 21 Jul 2025 10:07:52 +0100 Subject: [PATCH 02/20] target/arm: Correct encoding of Debug Communications Channel registers We don't implement the Debug Communications Channel (DCC), but we do attempt to provide dummy versions of its system registers so that software that tries to access them doesn't fall over. However, we got the tx/rx register definitions wrong. These should be: AArch32: DBGDTRTX p14 0 c0 c5 0 (on writes) DBGDTRRX p14 0 c0 c5 0 (on reads) AArch64: DBGDTRTX_EL0 2 3 0 5 0 (on writes) DBGDTRRX_EL0 2 3 0 5 0 (on reads) DBGDTR_EL0 2 3 0 4 0 (reads and writes) where DBGDTRTX and DBGDTRRX are effectively different names for the same 32-bit register, which has tx behaviour on writes and rx behaviour on reads. The AArch64-only DBGDTR_EL0 is a 64-bit wide register whose top and bottom halves map to the DBGDTRRX and DBGDTRTX registers. Currently we have just one cpreg struct, which: * calls itself DBGDTR_EL0 * uses the DBGDTRTX_EL0/DBGDTRRX_EL0 encoding * is marked as ARM_CP_STATE_BOTH but has the wrong opc1 value for AArch32 * is implemented as RAZ/WI Correct the encoding so: * we name the DBGDTRTX/DBGDTRRX register correctly * we split it into AA64 and AA32 versions so we can get the AA32 encoding right * we implement DBGDTR_EL0 at its correct encoding Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2986 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250708141049.778361-1-peter.maydell@linaro.org --- target/arm/debug_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index 69fb1d0d9f..aee06d4d42 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -988,11 +988,20 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */ - { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, + /* Architecturally DBGDTRTX is named DBGDTRRX when used for reads */ + { .name = "DBGDTRTX_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, .access = PL0_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDTRTX", .state = ARM_CP_STATE_AA32, .cp = 14, + .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* This is AArch64-only and is a combination of DBGDTRTX and DBGDTRRX */ + { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, /* * OSECCR_EL1 provides a mechanism for an operating system * to access the contents of EDECCR. EDECCR is not implemented though, From 8ccd35f25cdf2e03f44585a11b7daf93d1d46a3a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 21 Jul 2025 10:07:53 +0100 Subject: [PATCH 03/20] hw/misc/ivshmem-pci: Improve error handling Coverity points out that the ivshmem-pci code has some error handling cases where it incorrectly tries to use an invalid filedescriptor. These generally happen because ivshmem_recv_msg() calls qemu_chr_fe_get_msgfd(), which might return -1, but the code in process_msg() generally assumes that the file descriptor was provided when it was supposed to be. In particular: * the error case in process_msg() only needs to close the fd if one was provided * process_msg_shmem() should fail if no fd was provided Coverity: CID 1508726 Signed-off-by: Peter Maydell Reviewed-by: Markus Armbruster Message-id: 20250711145012.1521936-1-peter.maydell@linaro.org --- hw/misc/ivshmem-pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/misc/ivshmem-pci.c b/hw/misc/ivshmem-pci.c index 5a10bca633..d47ae739d6 100644 --- a/hw/misc/ivshmem-pci.c +++ b/hw/misc/ivshmem-pci.c @@ -479,6 +479,11 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) struct stat buf; size_t size; + if (fd < 0) { + error_setg(errp, "server didn't provide fd with shared memory message"); + return; + } + if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); close(fd); @@ -553,7 +558,9 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { error_setg(errp, "server sent invalid message %" PRId64, msg); - close(fd); + if (fd >= 0) { + close(fd); + } return; } From 32d8fb61e5a1fb5feeecce85d461be019cc30a54 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 21 Jul 2025 10:07:53 +0100 Subject: [PATCH 04/20] target/arm: Provide always-false kvm_arm_*_supported() stubs for usermode If you try to build aarch64-linux-user with clang and --enable-debug then it fails to compile: ld: libqemu-aarch64-linux-user.a.p/target_arm_cpu64.c.o: in function `cpu_arm_set_sve': ../../target/arm/cpu64.c:321:(.text+0x1254): undefined reference to `kvm_arm_sve_supported' This is a regression introduced in commit f86d4220, which switched the kvm-stub.c file away from being built for all arm targets to only being built for system emulation binaries. It doesn't affect gcc, presumably because even at -O0 gcc folds away the always-false kvm_enabled() condition but clang does not. We would prefer not to build kvm-stub.c once for usermode and once for system-emulation binaries, and we can't build it just once for both because it includes cpu.h. So instead provide always-false versions of the five functions that are valid to call without KVM support in kvm_arm.h. Fixes: f86d42205c2eba ("target/arm/meson: accelerator files are not needed in user mode") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3033 Signed-off-by: Peter Maydell Reviewed-by: Pierrick Bouvier Message-id: 20250714135152.1896214-1-peter.maydell@linaro.org --- target/arm/kvm_arm.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index b4cad05155..6a9b6374a6 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -161,6 +161,14 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu); */ void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp); +/* + * These "is some KVM subfeature enabled?" functions may be called + * when KVM support is not present, including in the user-mode + * emulators. The kvm-stub.c file is only built into the system + * emulators, so for user-mode emulation we provide "always false" + * stubs here. + */ +#ifndef CONFIG_USER_ONLY /** * kvm_arm_aarch32_supported: * @@ -197,6 +205,33 @@ bool kvm_arm_mte_supported(void); * Returns true if KVM can enable EL2 and false otherwise. */ bool kvm_arm_el2_supported(void); +#else + +static inline bool kvm_arm_aarch32_supported(void) +{ + return false; +} + +static inline bool kvm_arm_pmu_supported(void) +{ + return false; +} + +static inline bool kvm_arm_sve_supported(void) +{ + return false; +} + +static inline bool kvm_arm_mte_supported(void) +{ + return false; +} + +static inline bool kvm_arm_el2_supported(void) +{ + return false; +} +#endif /** * kvm_arm_get_max_vm_ipa_size: From e74aad9f81cc2bfee2057086610e21bd98e9c5a5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 21 Jul 2025 10:07:53 +0100 Subject: [PATCH 05/20] host-utils: Drop workaround for buggy Apple Clang __builtin_subcll() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit b0438861efe ("host-utils: Avoid using __builtin_subcll on buggy versions of Apple Clang") we added a workaround for a bug in Apple Clang 14 where its __builtin_subcll() implementation was wrong. This bug was only present in Apple Clang 14, not in upstream clang, and is not present in Apple Clang versions 15 and newer. Since commit 4e035201 we have required at least Apple Clang 15, so we no longer build with the buggy versions. We can therefore drop the workaround. This is effectively a revert of b0438861efe. This should not be backported to stable branches, which may still need to support Apple Clang 14. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3030 Signed-off-by: Peter Maydell Reviewed-by: Thomas Huth Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-id: 20250714145033.1908788-1-peter.maydell@linaro.org --- include/qemu/compiler.h | 13 ------------- include/qemu/host-utils.h | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 65b89958d3..1c2b673c05 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -182,19 +182,6 @@ #define QEMU_DISABLE_CFI #endif -/* - * Apple clang version 14 has a bug in its __builtin_subcll(); define - * BUILTIN_SUBCLL_BROKEN for the offending versions so we can avoid it. - * When a version of Apple clang which has this bug fixed is released - * we can add an upper bound to this check. - * See https://gitlab.com/qemu-project/qemu/-/issues/1631 - * and https://gitlab.com/qemu-project/qemu/-/issues/1659 for details. - * The bug never made it into any upstream LLVM releases, only Apple ones. - */ -#if defined(__apple_build_version__) && __clang_major__ >= 14 -#define BUILTIN_SUBCLL_BROKEN -#endif - #if __has_attribute(annotate) #define QEMU_ANNOTATE(x) __attribute__((annotate(x))) #else diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 4d28fa22cf..dd558589cb 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -677,7 +677,7 @@ static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) */ static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) { -#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN) +#if __has_builtin(__builtin_subcll) unsigned long long b = *pborrow; x = __builtin_subcll(x, y, b, &b); *pborrow = b & 1; From 30dbcd9283988ba352181cb42c6a69ae32075363 Mon Sep 17 00:00:00 2001 From: Jackson Donaldson Date: Mon, 21 Jul 2025 10:07:53 +0100 Subject: [PATCH 06/20] hw/misc/max78000_aes: Comment Internal Key Storage Coverity Scan noted an unusual pattern in the MAX78000 aes device, with duplicated calls to set_decrypt. This commit adds a comment noting why the implementation is correct. Signed-off-by: Jackson Donaldson Message-id: 20250716002622.84685-1-jcksn@duck.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/misc/max78000_aes.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/misc/max78000_aes.c b/hw/misc/max78000_aes.c index 0bfb2f02b5..d883ddd2b6 100644 --- a/hw/misc/max78000_aes.c +++ b/hw/misc/max78000_aes.c @@ -79,6 +79,12 @@ static void max78000_aes_do_crypto(Max78000AesState *s) keydata += 8; } + /* + * The MAX78000 AES engine stores an internal key, which it uses only + * for decryption. This results in the slighly odd looking pairs of + * set_encrypt and set_decrypt calls below; s->internal_key is + * being stored for later use in both cases. + */ AES_KEY key; if ((s->ctrl & TYPE) == 0) { AES_set_encrypt_key(keydata, keylen, &key); From 012bf8ad134ecec5d7e68101c216204107fd9741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 21 Jul 2025 10:07:54 +0100 Subject: [PATCH 07/20] docs: Fix Aspeed title MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ad8e0e8a0088 removed the "======" underlining the file title which broke documentation rendering. Add it back. Fixes: ad8e0e8a0088 ("docs: add support for gb200-bmc") Cc: Ed Tanous Reported-by: Peter Maydell Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Huth Reviewed-by: Ed Tanous Message-id: 20250715061904.97540-1-clg@redhat.com Signed-off-by: Peter Maydell --- docs/system/arm/aspeed.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index bec0a1dfa8..bf18c56347 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -1,4 +1,5 @@ Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``gb200nvl-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) +==================================================================================================================================================================================================================================================================================================================================================================================================================================== The QEMU Aspeed machines model BMCs of various OpenPOWER systems and Aspeed evaluation boards. They are based on different releases of the From 2b5a9bbbadb39750584f36e6bee3a36ebe134418 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:23 +0100 Subject: [PATCH 08/20] target/arm: Add BFADD, BFSUB, BFMUL (unpredicated) FEAT_SVE_B16B16 adds bfloat16 versions of the SVE floating point (unpredicated) instructions, which are encoded via sz==0b00. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-2-peter.maydell@linaro.org --- target/arm/tcg/helper.h | 3 +++ target/arm/tcg/translate-sve.c | 6 +++++- target/arm/tcg/vec_helper.c | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h index 0a006d9514..d9ca5b7c56 100644 --- a/target/arm/tcg/helper.h +++ b/target/arm/tcg/helper.h @@ -728,16 +728,19 @@ DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fadd_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fsub_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 7b575734fd..f00cccf154 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -190,6 +190,10 @@ static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, arg_rrr_esz *a, int data) { + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -4146,7 +4150,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) #define DO_FP3(NAME, name) \ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ - NULL, gen_helper_gvec_##name##_h, \ + gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ }; \ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index bae6165b50..76a9ab0da3 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -1467,16 +1467,19 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } +DO_3OP(gvec_fadd_b16, bfloat16_add, float16) DO_3OP(gvec_fadd_h, float16_add, float16) DO_3OP(gvec_fadd_s, float32_add, float32) DO_3OP(gvec_fadd_d, float64_add, float64) DO_3OP(gvec_bfadd, bfloat16_add, bfloat16) +DO_3OP(gvec_fsub_b16, bfloat16_sub, float16) DO_3OP(gvec_fsub_h, float16_sub, float16) DO_3OP(gvec_fsub_s, float32_sub, float32) DO_3OP(gvec_fsub_d, float64_sub, float64) DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16) +DO_3OP(gvec_fmul_b16, bfloat16_mul, float16) DO_3OP(gvec_fmul_h, float16_mul, float16) DO_3OP(gvec_fmul_s, float32_mul, float32) DO_3OP(gvec_fmul_d, float64_mul, float64) From 86fa06f8d9f953252a7919fa56a402d789bf1b78 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:24 +0100 Subject: [PATCH 09/20] target/arm: Add BFADD, BFSUB, BFMUL, BFMAXNM, BFMINNM (predicated) FEAT_SVE_B16B16 adds bfloat16 versions of the SVE floating point (predicated) instructions, which are encoded via sz=0b00. Add BFADD, BFSUB, BFMUL, BFMAXNM, BFMINNM; these are all the insns in this group which do not change behaviour for AH=1. We will deal with BFMAX/BFMIN (which do have different AH=1 behaviour) in a following commit. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-3-peter.maydell@linaro.org --- target/arm/tcg/helper-sve.h | 10 ++++++++++ target/arm/tcg/sve_helper.c | 5 +++++ target/arm/tcg/translate-sve.c | 22 +++++++++++++++++----- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index c36090d13d..d612bcaded 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1196,6 +1196,8 @@ DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fadd_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, @@ -1203,6 +1205,8 @@ DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fsub_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, @@ -1210,6 +1214,8 @@ DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmul_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG, @@ -1252,6 +1258,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fminnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, @@ -1259,6 +1267,8 @@ DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmaxnum_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 43b872c7fd..a229503bc2 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4629,14 +4629,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ } while (i != 0); \ } +DO_ZPZZ_FP(sve_fadd_b16, uint16_t, H1_2, bfloat16_add) DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add) +DO_ZPZZ_FP(sve_fsub_b16, uint16_t, H1_2, bfloat16_sub) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub) +DO_ZPZZ_FP(sve_fmul_b16, uint16_t, H1_2, bfloat16_mul) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul) @@ -4661,10 +4664,12 @@ DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) +DO_ZPZZ_FP(sve_fminnum_b16, uint16_t, H1_2, bfloat16_minnum) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) +DO_ZPZZ_FP(sve_fmaxnum_b16, uint16_t, H1_2, bfloat16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index f00cccf154..2739c226d7 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -407,6 +407,10 @@ static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, arg_rprr_esz *a) { + /* These insns use MO_8 to encode BFloat16. */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); } @@ -4206,13 +4210,21 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ name##_zpzz_fns[a->esz], a) -DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) -DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) -DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) +/* Similar, but for insns where sz == 0 encodes bfloat16 */ +#define DO_ZPZZ_FP_B16(NAME, FEAT, name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) + +DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd) +DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub) +DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul) DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) -DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) -DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) +DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum) +DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) From 279438560ba8575266e9105202c6e87044d24885 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:25 +0100 Subject: [PATCH 10/20] target/arm: Add BFMIN, BFMAX (predicated) FEAT_SVE_B16B16 adds bfloat16 versions of the SVE floating point (predicated) instructions, which are encoded via sz=0b00. Add the BFMAX and BFMIN insns. These have separate behaviour for AH=1 and AH=0; we have already implemented the AH=1 helper for the SME2 versions of these insns. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-4-peter.maydell@linaro.org --- target/arm/tcg/helper-sve.h | 8 ++++++++ target/arm/tcg/sve_helper.c | 4 ++++ target/arm/tcg/translate-sve.c | 17 +++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index d612bcaded..cb6c2355e5 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1230,6 +1230,8 @@ DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, @@ -1237,6 +1239,8 @@ DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, @@ -1244,6 +1248,8 @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmin_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, @@ -1251,6 +1257,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_6(sve_ah_fmax_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index a229503bc2..1a56fa86d9 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4648,18 +4648,22 @@ DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div) +DO_ZPZZ_FP(sve_fmin_b16, uint16_t, H1_2, bfloat16_min) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min) +DO_ZPZZ_FP(sve_fmax_b16, uint16_t, H1_2, bfloat16_max) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) +DO_ZPZZ_FP(sve_ah_fmin_b16, uint16_t, H1_2, helper_sme2_ah_fmin_b16) DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) +DO_ZPZZ_FP(sve_ah_fmax_b16, uint16_t, H1_2, helper_sme2_ah_fmax_b16) DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 2739c226d7..27af3df9a4 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4218,11 +4218,24 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, }; \ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) +#define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_b16, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \ + gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \ + gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \ + s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \ + name##_zpzz_fns[a->esz], a) + DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd) DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub) DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul) -DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) -DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) +DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin) +DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax) DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum) DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd) From f71c3f470f48bece6d0601171bfda63a0b746879 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:26 +0100 Subject: [PATCH 11/20] target/arm: Add BFMUL (indexed) FEAT_SVE_B16B16 adds a bfloat16 version of the FMUL insn in the floating-point multiply (indexed) instruction group. The encoding is slightly bespoke; in our implementation we use MO_8 to indicate bfloat16, as with the other B16B16 insns. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-5-peter.maydell@linaro.org --- target/arm/tcg/helper.h | 2 ++ target/arm/tcg/sve.decode | 1 + target/arm/tcg/translate-sve.c | 2 +- target/arm/tcg/vec_helper.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h index d9ca5b7c56..4da32db902 100644 --- a/target/arm/tcg/helper.h +++ b/target/arm/tcg/helper.h @@ -823,6 +823,8 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_5(gvec_fmul_idx_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index 2efd5f57e4..a76f2236f4 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -1062,6 +1062,7 @@ FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 ### SVE FP Multiply Indexed Group # SVE floating-point multiply (indexed) +FMUL_zzx 01100100 0. 1 ..... 001010 ..... ..... @rrx_3 esz=0 FMUL_zzx 01100100 0. 1 ..... 001000 ..... ..... @rrx_3 esz=1 FMUL_zzx 01100100 10 1 ..... 001000 ..... ..... @rrx_2 esz=2 FMUL_zzx 01100100 11 1 ..... 001000 ..... ..... @rrx_1 esz=3 diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 27af3df9a4..918cf6e1bd 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -3907,7 +3907,7 @@ TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, */ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { - NULL, gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h, gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, }; TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 76a9ab0da3..33a136b90a 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -1785,6 +1785,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \ #define nop(N, M, S) (M) +DO_FMUL_IDX(gvec_fmul_idx_b16, nop, bfloat16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2) DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4) DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8) From 929bec5581966c43d083588f9be38af48150c4fe Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:27 +0100 Subject: [PATCH 12/20] target/arm: Add BFMLA, BFMLS (vectors) FEAT_SVE_B16B16 adds bfloat16 versions of the FMLA and FMLS insns in the "SVE floating-point multiply-accumulate writing addend" group, encoded as sz=0b00. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-6-peter.maydell@linaro.org --- target/arm/tcg/helper-sve.h | 14 +++++++ target/arm/tcg/sve_helper.c | 69 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sve.c | 21 ++++++++--- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index cb6c2355e5..5e4b7fd8cf 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1541,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1548,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1555,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1562,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1569,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, @@ -1576,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, @@ -1583,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) +DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, fpst, i32) DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 1a56fa86d9..105cc5dd12 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -5099,6 +5099,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) #undef DO_ZPZ_FP +static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3, int flags) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)(vm + H1_2(i)); + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + r = bfloat16_muladd(e1, e2, e3, flags, status); + *(uint16_t *)(vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); +} + +void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); +} + +void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); +} + static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3, int flags) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 918cf6e1bd..37ecbc2b7c 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4368,19 +4368,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1), a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + #define DO_FMLA(NAME, name, ah_name) \ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \ - NULL, gen_helper_sve_##ah_name##_h, \ + gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \ }; \ - TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \ - s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \ - a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) + TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \ + s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz]) /* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */ DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz) From 67fbc4c8079226eb9e47369cc45eb3fe56c3c9c3 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:28 +0100 Subject: [PATCH 13/20] target/arm: Add BFMLA, BFMLS (indexed) FEAT_SVE_B16B16 adds bfloat16 versions of the FMLA and FMLS insns in the SVE floating-point multiply-add (indexed) insn group. Implement these. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-7-peter.maydell@linaro.org --- target/arm/tcg/sve.decode | 2 ++ target/arm/tcg/translate-sve.c | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index a76f2236f4..a77b725c87 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -1052,9 +1052,11 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \ ### SVE FP Multiply-Add Indexed Group # SVE floating-point multiply-add (indexed) +FMLA_zzxz 01100100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=0 FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1 FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2 FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3 +FMLS_zzxz 01100100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=0 FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1 FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2 FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3 diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 37ecbc2b7c..fc76624b5a 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -3883,24 +3883,31 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) *** SVE Floating Point Multiply-Add Indexed Group */ +static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + /* These insns use MO_8 to encode BFloat16 */ + if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) { + return false; + } + return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = { - NULL, gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h, gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d }; -TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz]) static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = { - { NULL, NULL }, + { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx }, { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h }, { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s }, { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d }, }; -TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz, - fmls_idx_fns[a->esz][s->fpcr_ah], - a->rd, a->rn, a->rm, a->ra, a->index, - a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) +TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a, + fmls_idx_fns[a->esz][s->fpcr_ah]) /* *** SVE Floating Point Multiply Indexed Group From 17f6436822ff600cae4590c4b06b3321c97f1f42 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:29 +0100 Subject: [PATCH 14/20] target/arm: Correct sense of FPCR.AH test for FMAXQV and FMINQV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we implemented the FMAXQV and FMINQV insns we accidentally inverted the sense of the FPCR.AH test, so we gave the AH=1 behaviour when FPCR.AH was zero, and vice-versa. (The difference is limited to handling of negative zero and NaN inputs.) Fixes: 1de7ecfc12d05 ("target/arm: Implement FADDQV, F{MIN, MAX}{NM}QV for SVE2p1") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-id: 20250718173032.2498900-8-peter.maydell@linaro.org --- target/arm/tcg/translate-sve.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index fc76624b5a..2ed440aff1 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -4020,7 +4020,7 @@ static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = { gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d, }; TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, - (s->fpcr_ah ? fmaxqv_fns : fmaxqv_ah_fns)[a->esz], a, 0, + (s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) static gen_helper_gvec_3_ptr * const fminqv_fns[4] = { @@ -4032,7 +4032,7 @@ static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = { gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d, }; TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz, - (s->fpcr_ah ? fminqv_fns : fminqv_ah_fns)[a->esz], a, 0, + (s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64) /* From 07327d5f451162a841747836ff05cc6dd6e8c023 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:30 +0100 Subject: [PATCH 15/20] target/arm: Don't nest H() macro calls in SVE DO_REDUCE In the part of the SVE DO_REDUCE macro used by the SVE2p1 FMAXQV, FMINQV, etc insns, we incorrectly applied the H() macro twice when calculating an offset to add to the vn pointer. This has no effect on little-endian hosts but on big-endian hosts the two invocations will cancel each other out and we will access the wrong part of the array. The "s * 16" part of the expression is already aligned, so we only need to use the H macro on the "e". Correct the macro usage. Fixes: 1de7ecfc12d05 ("target/arm: Implement FADDQV, F{MIN, MAX}{NM}QV for SVE2p1") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-9-peter.maydell@linaro.org --- target/arm/tcg/sve_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 105cc5dd12..bf894f0bf1 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4509,7 +4509,7 @@ void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \ TYPE data[ARM_MAX_VQ]; \ for (unsigned s = 0; s < segments; s++) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \ - TYPE nn = *(TYPE *)(vn + H(s * 16 + H(e))); \ + TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \ data[s] = (pg >> e) & 1 ? nn : IDENT; \ } \ *(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \ From 82a1c5c661ef9ab567b7946b75240963c153a3b0 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:31 +0100 Subject: [PATCH 16/20] target/arm: Honour FPCR.AH=1 default NaN value in FMAXNMQV, FMINNMQV The FMAXNMQV and FMINNMQV insns use the default NaN as their identity value for inactive source vector elements. We open-coded this in sve_helper.c, hoping to avoid a function call. However, this fails to account for FPCR.AH=1 changing the default NaN value to set the sign bit. Use a call to floatN_default_nan() to obtain this value. Fixes: 1de7ecfc12d05 ("target/arm: Implement FADDQV, F{MIN, MAX}{NM}QV for SVE2p1") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-10-peter.maydell@linaro.org --- target/arm/tcg/sve_helper.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index bf894f0bf1..803f0a094d 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4484,33 +4484,35 @@ static TYPE FUNC##_reduce(TYPE *data, float_status *status, uintptr_t n) \ } \ } \ uint64_t helper_sve_##NAME##v_##SUF(void *vn, void *vg, \ - float_status *s, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + TYPE ident = IDENT; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ do { \ TYPE nn = *(TYPE *)(vn + H(i)); \ - *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \ + *(TYPE *)((void *)data + i) = (pg & 1 ? nn : ident); \ i += sizeof(TYPE), pg >>= sizeof(TYPE); \ } while (i & 15); \ } \ for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((void *)data + i) = IDENT; \ + *(TYPE *)((void *)data + i) = ident; \ } \ - return FUNC##_reduce(data, s, maxsz / sizeof(TYPE)); \ + return FUNC##_reduce(data, status, maxsz / sizeof(TYPE)); \ } \ void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \ float_status *status, uint32_t desc) \ { \ unsigned oprsz = simd_oprsz(desc), segments = oprsz / 16; \ + TYPE ident = IDENT; \ for (unsigned e = 0; e < 16; e += sizeof(TYPE)) { \ TYPE data[ARM_MAX_VQ]; \ for (unsigned s = 0; s < segments; s++) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \ TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \ - data[s] = (pg >> e) & 1 ? nn : IDENT; \ + data[s] = (pg >> e) & 1 ? nn : ident; \ } \ *(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \ } \ @@ -4521,14 +4523,17 @@ DO_REDUCE(fadd,h, float16, H1_2, float16_add, float16_zero) DO_REDUCE(fadd,s, float32, H1_4, float32_add, float32_zero) DO_REDUCE(fadd,d, float64, H1_8, float64_add, float64_zero) -/* Identity is floatN_default_nan, without the function call. */ -DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, 0x7E00) -DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, 0x7FC00000) -DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) +/* + * We can't avoid the function call for the default NaN value, because + * it changes when FPCR.AH is set. + */ +DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, float16_default_nan(status)) +DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, float32_default_nan(status)) +DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, float64_default_nan(status)) -DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, 0x7E00) -DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, 0x7FC00000) -DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, float16_default_nan(status)) +DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, float32_default_nan(status)) +DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, float64_default_nan(status)) DO_REDUCE(fmin,h, float16, H1_2, float16_min, float16_infinity) DO_REDUCE(fmin,s, float32, H1_4, float32_min, float32_infinity) From 082933a1f7d3c8e4a9e999c3d284928ef866c67d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 18 Jul 2025 18:30:32 +0100 Subject: [PATCH 17/20] target/arm: Make LD1Q decode and trans fn agree about a->u For the LD1Q instruction (gather load of quadwords) we use the LD1_zprz pattern with MO_128 elements. At this element size there is no signed vs unsigned distinction, and we only set the 'u' bit in the arg_LD1_zprz struct because we share the code and decode struct with smaller element sizes. However, we set u=0 in the decode pattern line but then accidentally asserted that it was 1 in the trans function. Since our usual convention is that the "default" is unsigned and we only mark operations as signed when they really do need to extend, change the decode pattern line to set u=1 to match the assert. Fixes: d2aa9a804ee6 ("target/arm: Implement LD1Q, ST1Q for SVE2p1") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250718173032.2498900-11-peter.maydell@linaro.org --- target/arm/tcg/sve.decode | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index a77b725c87..aea7f51973 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -1345,7 +1345,7 @@ LD1_zprz 1100010 11 1. ..... 11. ... ..... ..... \ # LD1Q LD1_zprz 1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 \ - &rprr_gather_load u=0 ff=0 xs=2 esz=4 msz=4 scale=0 + &rprr_gather_load u=1 ff=0 xs=2 esz=4 msz=4 scale=0 # SVE 64-bit gather load (vector plus immediate) LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \ From f19310b23a00b5c19f930e4d57fc298744d11740 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 15 Jul 2025 00:01:38 +0800 Subject: [PATCH 18/20] hvf: arm: Add permission check in GIC sysreg handlers Quoting Peter Maydell: " hvf_sysreg_read_cp() and hvf_sysreg_write_cp() do not check the .access field of the ARMCPRegInfo to ensure that they forbid writes to registers that are marked with a .access field that says they're read-only (and ditto reads to write-only registers). " Before we add more registers in GIC sysreg handlers, let's get it correct by adding the .access checks to hvf_sysreg_read_cp() and hvf_sysreg_write_cp(). With that, a sysreg access with invalid permission will result in an UNDEFINED exception. Suggested-by: Peter Maydell Signed-off-by: Zenghui Yu Message-id: 20250714160139.10404-2-zenghui.yu@linux.dev Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/hvf/hvf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 8f93e42b34..861657df96 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1263,6 +1263,9 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, true)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) { return false; @@ -1543,6 +1546,9 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); if (ri) { + if (!cp_access_ok(1, ri, false)) { + return false; + } if (ri->accessfn) { if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) { return false; From e6da704b711d5d731e4d933ad56cbbc25ee0a825 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 15 Jul 2025 00:01:39 +0800 Subject: [PATCH 19/20] hvf: arm: Emulate ICC_RPR_EL1 accesses properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a2260983c655 ("hvf: arm: Add support for GICv3") added GICv3 support by implementing emulation for a few system registers. ICC_RPR_EL1 was defined but not plugged in the sysreg handlers (for no good reason). Fix it. Fixes: a2260983c655 ("hvf: arm: Add support for GICv3") Signed-off-by: Zenghui Yu Reviewed-by: Philippe Mathieu-Daudé Message-id: 20250714160139.10404-3-zenghui.yu@linux.dev Signed-off-by: Peter Maydell --- target/arm/hvf/hvf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 861657df96..bd6b5d11de 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1361,6 +1361,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1678,6 +1679,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: From 7724ca9a772594b96939ff549c74f46e11d7870b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 16 Jul 2025 19:28:11 +0200 Subject: [PATCH 20/20] accel/hvf: Display executable bit as 'X' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Developers are accustomed to read RWX, not RWE. Replace E -> X. Reported-by: Alex Bennée Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Alex Bennée Reviewed-by: Mads Ynddal Reviewed-by: Xiaoyao Li Signed-off-by: Peter Maydell --- accel/hvf/hvf-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index e67a8105a6..0a4b498e83 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -84,7 +84,7 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, flags & HV_MEMORY_READ ? 'R' : '-', flags & HV_MEMORY_WRITE ? 'W' : '-', - flags & HV_MEMORY_EXEC ? 'E' : '-'); + flags & HV_MEMORY_EXEC ? 'X' : '-'); ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); assert_hvf_ok(ret); return 0;