mirror of
https://github.com/qemu/qemu.git
synced 2025-08-02 12:45:00 +00:00
target-arm queue:
* hvf: arm: Remove $pc from trace_hvf_data_abort() * target/arm: Correct encoding of Debug Communications Channel registers * hw/misc/ivshmem-pci: Improve error handling * target/arm: Provide always-false kvm_arm_*_supported() stubs for usermode * host-utils: Drop workaround for buggy Apple Clang __builtin_subcll() * hw/misc/max78000_aes: Comment Internal Key Storage * docs: Fix Aspeed title * Implement a handful of missing FEAT_SVE_B16_B16 insns * Fix bugs in FMAXQV, FMINQV, etc * Fix assert in LD1Q decode * hvf: arm: Add permission check in GIC sysreg handlers * hvf: arm: Emulate ICC_RPR_EL1 accesses properly * accel/hvf: Display executable bit as 'X' -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmh+OlgZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3klRD/0QvWyi0z0wbBdIKGi8kury VP/S1FcqP2w9l0jUAmINxsOYu/+ql8w3t6m2jxlTOYZc9rNauQrxRYGD1GdqfJVn VCYN47OHRye+iMH6c5jdKyJTi3n2uvT2g8anh2Yt18xOey45Pwr2fTs8rje3N5sH fvv1mNaeGyA7wgXEY9Z4tEvW19ZPsuKIBd4Ea7klDXwUpTmmQkJMVTKyz7/TefqK YicpM9E4dMxwNvW/8zrsVH505E4s15OytIjhDci30v2M7Uh3tUIRKo+s44Kk7cEq HIwO/ra1HEhp8xGU2UfRf643EF1k7A0VUgetHSxYbi1gIknjiSU8Ohnd268CgY7G /UbjgjFu5mcJgBvoNCwGy0iuCkEfRZ+veq24OjL6GbMvt2utCm9Rumu5KLemLpvW mB4RexjbT+FDldhz8Ub42sR96UpZ0TSEfwjOswHibA4Zk4o2S6fRv0OqaBMB2dKU MhcMdBdPXe3SmecRlYoqde+RCUyIOMD46uiJYrBhG6Nfn6SgcnS+ZGB960veaH2r p5lvyJIBn3/a1KwOET9z5Gp3C6l4TXgJjIaCB4qAWHxeBR/AIobRC71heCc0uPU+ J3Nm5/p6rcz0vjzs1To0VuXM57qNpQJWu0Un+8CDjpXJ0piyBk4o2iu1dw26bsMs yrgqlMfDsXNlK/yUdykAbw== =rQ+N -----END PGP SIGNATURE----- Merge tag 'pull-target-arm-20250721' of https://gitlab.com/pm215/qemu into staging target-arm queue: * hvf: arm: Remove $pc from trace_hvf_data_abort() * target/arm: Correct encoding of Debug Communications Channel registers * hw/misc/ivshmem-pci: Improve error handling * target/arm: Provide always-false kvm_arm_*_supported() stubs for usermode * host-utils: Drop workaround for buggy Apple Clang __builtin_subcll() * hw/misc/max78000_aes: Comment Internal Key Storage * docs: Fix Aspeed title * Implement a handful of missing FEAT_SVE_B16_B16 insns * Fix bugs in FMAXQV, FMINQV, etc * Fix assert in LD1Q decode * hvf: arm: Add permission check in GIC sysreg handlers * hvf: arm: Emulate ICC_RPR_EL1 accesses properly * accel/hvf: Display executable bit as 'X' # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmh+OlgZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3klRD/0QvWyi0z0wbBdIKGi8kury # VP/S1FcqP2w9l0jUAmINxsOYu/+ql8w3t6m2jxlTOYZc9rNauQrxRYGD1GdqfJVn # VCYN47OHRye+iMH6c5jdKyJTi3n2uvT2g8anh2Yt18xOey45Pwr2fTs8rje3N5sH # fvv1mNaeGyA7wgXEY9Z4tEvW19ZPsuKIBd4Ea7klDXwUpTmmQkJMVTKyz7/TefqK # YicpM9E4dMxwNvW/8zrsVH505E4s15OytIjhDci30v2M7Uh3tUIRKo+s44Kk7cEq # HIwO/ra1HEhp8xGU2UfRf643EF1k7A0VUgetHSxYbi1gIknjiSU8Ohnd268CgY7G # /UbjgjFu5mcJgBvoNCwGy0iuCkEfRZ+veq24OjL6GbMvt2utCm9Rumu5KLemLpvW # mB4RexjbT+FDldhz8Ub42sR96UpZ0TSEfwjOswHibA4Zk4o2S6fRv0OqaBMB2dKU # MhcMdBdPXe3SmecRlYoqde+RCUyIOMD46uiJYrBhG6Nfn6SgcnS+ZGB960veaH2r # p5lvyJIBn3/a1KwOET9z5Gp3C6l4TXgJjIaCB4qAWHxeBR/AIobRC71heCc0uPU+ # J3Nm5/p6rcz0vjzs1To0VuXM57qNpQJWu0Un+8CDjpXJ0piyBk4o2iu1dw26bsMs # yrgqlMfDsXNlK/yUdykAbw== # =rQ+N # -----END PGP SIGNATURE----- # gpg: Signature made Mon 21 Jul 2025 09:02:16 EDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20250721' of https://gitlab.com/pm215/qemu: accel/hvf: Display executable bit as 'X' hvf: arm: Emulate ICC_RPR_EL1 accesses properly hvf: arm: Add permission check in GIC sysreg handlers target/arm: Make LD1Q decode and trans fn agree about a->u target/arm: Honour FPCR.AH=1 default NaN value in FMAXNMQV, FMINNMQV target/arm: Don't nest H() macro calls in SVE DO_REDUCE target/arm: Correct sense of FPCR.AH test for FMAXQV and FMINQV target/arm: Add BFMLA, BFMLS (indexed) target/arm: Add BFMLA, BFMLS (vectors) target/arm: Add BFMUL (indexed) target/arm: Add BFMIN, BFMAX (predicated) target/arm: Add BFADD, BFSUB, BFMUL, BFMAXNM, BFMINNM (predicated) target/arm: Add BFADD, BFSUB, BFMUL (unpredicated) docs: Fix Aspeed title hw/misc/max78000_aes: Comment Internal Key Storage host-utils: Drop workaround for buggy Apple Clang __builtin_subcll() target/arm: Provide always-false kvm_arm_*_supported() stubs for usermode hw/misc/ivshmem-pci: Improve error handling target/arm: Correct encoding of Debug Communications Channel registers hvf: arm: Remove $pc from trace_hvf_data_abort() Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
bb153e7960
@ -84,7 +84,7 @@ static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
|
||||
trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags,
|
||||
flags & HV_MEMORY_READ ? 'R' : '-',
|
||||
flags & HV_MEMORY_WRITE ? 'W' : '-',
|
||||
flags & HV_MEMORY_EXEC ? 'E' : '-');
|
||||
flags & HV_MEMORY_EXEC ? 'X' : '-');
|
||||
ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
|
||||
assert_hvf_ok(ret);
|
||||
return 0;
|
||||
|
@ -1,4 +1,5 @@
|
||||
Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``gb200nvl-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``)
|
||||
====================================================================================================================================================================================================================================================================================================================================================================================================================================
|
||||
|
||||
The QEMU Aspeed machines model BMCs of various OpenPOWER systems and
|
||||
Aspeed evaluation boards. They are based on different releases of the
|
||||
|
@ -479,6 +479,11 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
|
||||
struct stat buf;
|
||||
size_t size;
|
||||
|
||||
if (fd < 0) {
|
||||
error_setg(errp, "server didn't provide fd with shared memory message");
|
||||
return;
|
||||
}
|
||||
|
||||
if (s->ivshmem_bar2) {
|
||||
error_setg(errp, "server sent unexpected shared memory message");
|
||||
close(fd);
|
||||
@ -553,7 +558,9 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
|
||||
|
||||
if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
|
||||
error_setg(errp, "server sent invalid message %" PRId64, msg);
|
||||
close(fd);
|
||||
if (fd >= 0) {
|
||||
close(fd);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -79,6 +79,12 @@ static void max78000_aes_do_crypto(Max78000AesState *s)
|
||||
keydata += 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* The MAX78000 AES engine stores an internal key, which it uses only
|
||||
* for decryption. This results in the slighly odd looking pairs of
|
||||
* set_encrypt and set_decrypt calls below; s->internal_key is
|
||||
* being stored for later use in both cases.
|
||||
*/
|
||||
AES_KEY key;
|
||||
if ((s->ctrl & TYPE) == 0) {
|
||||
AES_set_encrypt_key(keydata, keylen, &key);
|
||||
|
@ -182,19 +182,6 @@
|
||||
#define QEMU_DISABLE_CFI
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Apple clang version 14 has a bug in its __builtin_subcll(); define
|
||||
* BUILTIN_SUBCLL_BROKEN for the offending versions so we can avoid it.
|
||||
* When a version of Apple clang which has this bug fixed is released
|
||||
* we can add an upper bound to this check.
|
||||
* See https://gitlab.com/qemu-project/qemu/-/issues/1631
|
||||
* and https://gitlab.com/qemu-project/qemu/-/issues/1659 for details.
|
||||
* The bug never made it into any upstream LLVM releases, only Apple ones.
|
||||
*/
|
||||
#if defined(__apple_build_version__) && __clang_major__ >= 14
|
||||
#define BUILTIN_SUBCLL_BROKEN
|
||||
#endif
|
||||
|
||||
#if __has_attribute(annotate)
|
||||
#define QEMU_ANNOTATE(x) __attribute__((annotate(x)))
|
||||
#else
|
||||
|
@ -677,7 +677,7 @@ static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
|
||||
*/
|
||||
static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
|
||||
{
|
||||
#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
|
||||
#if __has_builtin(__builtin_subcll)
|
||||
unsigned long long b = *pborrow;
|
||||
x = __builtin_subcll(x, y, b, &b);
|
||||
*pborrow = b & 1;
|
||||
|
@ -988,11 +988,20 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
|
||||
.opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
|
||||
.access = PL1_RW, .accessfn = access_tdcc,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
/* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */
|
||||
{ .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14,
|
||||
/* Architecturally DBGDTRTX is named DBGDTRRX when used for reads */
|
||||
{ .name = "DBGDTRTX_EL0", .state = ARM_CP_STATE_AA64,
|
||||
.opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0,
|
||||
.access = PL0_RW, .accessfn = access_tdcc,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
{ .name = "DBGDTRTX", .state = ARM_CP_STATE_AA32, .cp = 14,
|
||||
.opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
|
||||
.access = PL0_RW, .accessfn = access_tdcc,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
/* This is AArch64-only and is a combination of DBGDTRTX and DBGDTRRX */
|
||||
{ .name = "DBGDTR_EL0", .state = ARM_CP_STATE_AA64,
|
||||
.opc0 = 2, .opc1 = 3, .crn = 0, .crm = 4, .opc2 = 0,
|
||||
.access = PL0_RW, .accessfn = access_tdcc,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
/*
|
||||
* OSECCR_EL1 provides a mechanism for an operating system
|
||||
* to access the contents of EDECCR. EDECCR is not implemented though,
|
||||
|
@ -1263,6 +1263,9 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
|
||||
|
||||
ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
|
||||
if (ri) {
|
||||
if (!cp_access_ok(1, ri, true)) {
|
||||
return false;
|
||||
}
|
||||
if (ri->accessfn) {
|
||||
if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) {
|
||||
return false;
|
||||
@ -1358,6 +1361,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val)
|
||||
case SYSREG_ICC_IGRPEN0_EL1:
|
||||
case SYSREG_ICC_IGRPEN1_EL1:
|
||||
case SYSREG_ICC_PMR_EL1:
|
||||
case SYSREG_ICC_RPR_EL1:
|
||||
case SYSREG_ICC_SGI0R_EL1:
|
||||
case SYSREG_ICC_SGI1R_EL1:
|
||||
case SYSREG_ICC_SRE_EL1:
|
||||
@ -1543,6 +1547,9 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val)
|
||||
ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
|
||||
|
||||
if (ri) {
|
||||
if (!cp_access_ok(1, ri, false)) {
|
||||
return false;
|
||||
}
|
||||
if (ri->accessfn) {
|
||||
if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) {
|
||||
return false;
|
||||
@ -1672,6 +1679,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
|
||||
case SYSREG_ICC_IGRPEN0_EL1:
|
||||
case SYSREG_ICC_IGRPEN1_EL1:
|
||||
case SYSREG_ICC_PMR_EL1:
|
||||
case SYSREG_ICC_RPR_EL1:
|
||||
case SYSREG_ICC_SGI0R_EL1:
|
||||
case SYSREG_ICC_SGI1R_EL1:
|
||||
case SYSREG_ICC_SRE_EL1:
|
||||
@ -2005,7 +2013,7 @@ int hvf_vcpu_exec(CPUState *cpu)
|
||||
uint32_t cm = (syndrome >> 8) & 0x1;
|
||||
uint64_t val = 0;
|
||||
|
||||
trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address,
|
||||
trace_hvf_data_abort(hvf_exit->exception.virtual_address,
|
||||
hvf_exit->exception.physical_address, isv,
|
||||
iswrite, s1ptw, len, srt);
|
||||
|
||||
|
@ -2,7 +2,7 @@ hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1,
|
||||
hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
|
||||
hvf_inject_fiq(void) "injecting FIQ"
|
||||
hvf_inject_irq(void) "injecting IRQ"
|
||||
hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
|
||||
hvf_data_abort(uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
|
||||
hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64
|
||||
hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")"
|
||||
hvf_unknown_hvc(uint64_t pc, uint64_t x0) "pc=0x%"PRIx64" unknown HVC! 0x%016"PRIx64
|
||||
|
@ -161,6 +161,14 @@ void kvm_arm_add_vcpu_properties(ARMCPU *cpu);
|
||||
*/
|
||||
void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp);
|
||||
|
||||
/*
|
||||
* These "is some KVM subfeature enabled?" functions may be called
|
||||
* when KVM support is not present, including in the user-mode
|
||||
* emulators. The kvm-stub.c file is only built into the system
|
||||
* emulators, so for user-mode emulation we provide "always false"
|
||||
* stubs here.
|
||||
*/
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
/**
|
||||
* kvm_arm_aarch32_supported:
|
||||
*
|
||||
@ -197,6 +205,33 @@ bool kvm_arm_mte_supported(void);
|
||||
* Returns true if KVM can enable EL2 and false otherwise.
|
||||
*/
|
||||
bool kvm_arm_el2_supported(void);
|
||||
#else
|
||||
|
||||
static inline bool kvm_arm_aarch32_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_pmu_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_sve_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_mte_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_el2_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kvm_arm_get_max_vm_ipa_size:
|
||||
|
@ -1196,6 +1196,8 @@ DEF_HELPER_FLAGS_5(sve_fcmne0_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(sve_fcmne0_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fadd_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fadd_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG,
|
||||
@ -1203,6 +1205,8 @@ DEF_HELPER_FLAGS_6(sve_fadd_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fsub_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fsub_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG,
|
||||
@ -1210,6 +1214,8 @@ DEF_HELPER_FLAGS_6(sve_fsub_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fsub_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fmul_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmul_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmul_s, TCG_CALL_NO_RWG,
|
||||
@ -1224,6 +1230,8 @@ DEF_HELPER_FLAGS_6(sve_fdiv_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fdiv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fmin_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmin_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG,
|
||||
@ -1231,6 +1239,8 @@ DEF_HELPER_FLAGS_6(sve_fmin_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fmin_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fmax_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmax_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
|
||||
@ -1238,6 +1248,8 @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmin_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
|
||||
@ -1245,6 +1257,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmax_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
|
||||
@ -1252,6 +1266,8 @@ DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fminnum_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
|
||||
@ -1259,6 +1275,8 @@ DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fminnum_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_fmaxnum_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmaxnum_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_fmaxnum_s, TCG_CALL_NO_RWG,
|
||||
@ -1523,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1530,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1537,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1544,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1551,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1558,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
@ -1565,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||
|
@ -728,16 +728,19 @@ DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_b16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
@ -820,6 +823,8 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
|
||||
|
@ -1052,9 +1052,11 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \
|
||||
### SVE FP Multiply-Add Indexed Group
|
||||
|
||||
# SVE floating-point multiply-add (indexed)
|
||||
FMLA_zzxz 01100100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=0
|
||||
FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1
|
||||
FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
|
||||
FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
|
||||
FMLS_zzxz 01100100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=0
|
||||
FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1
|
||||
FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
|
||||
FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
|
||||
@ -1062,6 +1064,7 @@ FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
|
||||
### SVE FP Multiply Indexed Group
|
||||
|
||||
# SVE floating-point multiply (indexed)
|
||||
FMUL_zzx 01100100 0. 1 ..... 001010 ..... ..... @rrx_3 esz=0
|
||||
FMUL_zzx 01100100 0. 1 ..... 001000 ..... ..... @rrx_3 esz=1
|
||||
FMUL_zzx 01100100 10 1 ..... 001000 ..... ..... @rrx_2 esz=2
|
||||
FMUL_zzx 01100100 11 1 ..... 001000 ..... ..... @rrx_1 esz=3
|
||||
@ -1342,7 +1345,7 @@ LD1_zprz 1100010 11 1. ..... 11. ... ..... ..... \
|
||||
|
||||
# LD1Q
|
||||
LD1_zprz 1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 \
|
||||
&rprr_gather_load u=0 ff=0 xs=2 esz=4 msz=4 scale=0
|
||||
&rprr_gather_load u=1 ff=0 xs=2 esz=4 msz=4 scale=0
|
||||
|
||||
# SVE 64-bit gather load (vector plus immediate)
|
||||
LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
|
||||
|
@ -4484,33 +4484,35 @@ static TYPE FUNC##_reduce(TYPE *data, float_status *status, uintptr_t n) \
|
||||
} \
|
||||
} \
|
||||
uint64_t helper_sve_##NAME##v_##SUF(void *vn, void *vg, \
|
||||
float_status *s, uint32_t desc) \
|
||||
float_status *status, uint32_t desc) \
|
||||
{ \
|
||||
uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \
|
||||
TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
|
||||
TYPE ident = IDENT; \
|
||||
for (i = 0; i < oprsz; ) { \
|
||||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
|
||||
do { \
|
||||
TYPE nn = *(TYPE *)(vn + H(i)); \
|
||||
*(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \
|
||||
*(TYPE *)((void *)data + i) = (pg & 1 ? nn : ident); \
|
||||
i += sizeof(TYPE), pg >>= sizeof(TYPE); \
|
||||
} while (i & 15); \
|
||||
} \
|
||||
for (; i < maxsz; i += sizeof(TYPE)) { \
|
||||
*(TYPE *)((void *)data + i) = IDENT; \
|
||||
*(TYPE *)((void *)data + i) = ident; \
|
||||
} \
|
||||
return FUNC##_reduce(data, s, maxsz / sizeof(TYPE)); \
|
||||
return FUNC##_reduce(data, status, maxsz / sizeof(TYPE)); \
|
||||
} \
|
||||
void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \
|
||||
float_status *status, uint32_t desc) \
|
||||
{ \
|
||||
unsigned oprsz = simd_oprsz(desc), segments = oprsz / 16; \
|
||||
TYPE ident = IDENT; \
|
||||
for (unsigned e = 0; e < 16; e += sizeof(TYPE)) { \
|
||||
TYPE data[ARM_MAX_VQ]; \
|
||||
for (unsigned s = 0; s < segments; s++) { \
|
||||
uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \
|
||||
TYPE nn = *(TYPE *)(vn + H(s * 16 + H(e))); \
|
||||
data[s] = (pg >> e) & 1 ? nn : IDENT; \
|
||||
TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \
|
||||
data[s] = (pg >> e) & 1 ? nn : ident; \
|
||||
} \
|
||||
*(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \
|
||||
} \
|
||||
@ -4521,14 +4523,17 @@ DO_REDUCE(fadd,h, float16, H1_2, float16_add, float16_zero)
|
||||
DO_REDUCE(fadd,s, float32, H1_4, float32_add, float32_zero)
|
||||
DO_REDUCE(fadd,d, float64, H1_8, float64_add, float64_zero)
|
||||
|
||||
/* Identity is floatN_default_nan, without the function call. */
|
||||
DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, 0x7E00)
|
||||
DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, 0x7FC00000)
|
||||
DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
|
||||
/*
|
||||
* We can't avoid the function call for the default NaN value, because
|
||||
* it changes when FPCR.AH is set.
|
||||
*/
|
||||
DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, float16_default_nan(status))
|
||||
DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, float32_default_nan(status))
|
||||
DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, float64_default_nan(status))
|
||||
|
||||
DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, 0x7E00)
|
||||
DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, 0x7FC00000)
|
||||
DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
|
||||
DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, float16_default_nan(status))
|
||||
DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, float32_default_nan(status))
|
||||
DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, float64_default_nan(status))
|
||||
|
||||
DO_REDUCE(fmin,h, float16, H1_2, float16_min, float16_infinity)
|
||||
DO_REDUCE(fmin,s, float32, H1_4, float32_min, float32_infinity)
|
||||
@ -4629,14 +4634,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
|
||||
} while (i != 0); \
|
||||
}
|
||||
|
||||
DO_ZPZZ_FP(sve_fadd_b16, uint16_t, H1_2, bfloat16_add)
|
||||
DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
|
||||
DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
|
||||
DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add)
|
||||
|
||||
DO_ZPZZ_FP(sve_fsub_b16, uint16_t, H1_2, bfloat16_sub)
|
||||
DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
|
||||
DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
|
||||
DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub)
|
||||
|
||||
DO_ZPZZ_FP(sve_fmul_b16, uint16_t, H1_2, bfloat16_mul)
|
||||
DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
|
||||
DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
|
||||
DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul)
|
||||
@ -4645,26 +4653,32 @@ DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
|
||||
DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
|
||||
DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div)
|
||||
|
||||
DO_ZPZZ_FP(sve_fmin_b16, uint16_t, H1_2, bfloat16_min)
|
||||
DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
|
||||
DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
|
||||
DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min)
|
||||
|
||||
DO_ZPZZ_FP(sve_fmax_b16, uint16_t, H1_2, bfloat16_max)
|
||||
DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
|
||||
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
|
||||
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
|
||||
|
||||
DO_ZPZZ_FP(sve_ah_fmin_b16, uint16_t, H1_2, helper_sme2_ah_fmin_b16)
|
||||
DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
|
||||
DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
|
||||
DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
|
||||
|
||||
DO_ZPZZ_FP(sve_ah_fmax_b16, uint16_t, H1_2, helper_sme2_ah_fmax_b16)
|
||||
DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
|
||||
DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
|
||||
DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
|
||||
|
||||
DO_ZPZZ_FP(sve_fminnum_b16, uint16_t, H1_2, bfloat16_minnum)
|
||||
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
|
||||
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
|
||||
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
|
||||
|
||||
DO_ZPZZ_FP(sve_fmaxnum_b16, uint16_t, H1_2, bfloat16_maxnum)
|
||||
DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
|
||||
DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
|
||||
DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum)
|
||||
@ -5090,6 +5104,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
|
||||
|
||||
#undef DO_ZPZ_FP
|
||||
|
||||
static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg,
|
||||
float_status *status, uint32_t desc,
|
||||
uint16_t neg1, uint16_t neg3, int flags)
|
||||
{
|
||||
intptr_t i = simd_oprsz(desc);
|
||||
uint64_t *g = vg;
|
||||
|
||||
do {
|
||||
uint64_t pg = g[(i - 1) >> 6];
|
||||
do {
|
||||
i -= 2;
|
||||
if (likely((pg >> (i & 63)) & 1)) {
|
||||
float16 e1, e2, e3, r;
|
||||
|
||||
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
|
||||
e2 = *(uint16_t *)(vm + H1_2(i));
|
||||
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
|
||||
r = bfloat16_muladd(e1, e2, e3, flags, status);
|
||||
*(uint16_t *)(vd + H1_2(i)) = r;
|
||||
}
|
||||
} while (i & 63);
|
||||
} while (i != 0);
|
||||
}
|
||||
|
||||
void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
|
||||
}
|
||||
|
||||
void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
|
||||
}
|
||||
|
||||
void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
|
||||
}
|
||||
|
||||
void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
|
||||
}
|
||||
|
||||
void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||
float_muladd_negate_product);
|
||||
}
|
||||
|
||||
void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||
float_muladd_negate_product | float_muladd_negate_c);
|
||||
}
|
||||
|
||||
void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
|
||||
void *vg, float_status *status, uint32_t desc)
|
||||
{
|
||||
do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||
float_muladd_negate_c);
|
||||
}
|
||||
|
||||
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
|
||||
float_status *status, uint32_t desc,
|
||||
uint16_t neg1, uint16_t neg3, int flags)
|
||||
|
@ -190,6 +190,10 @@ static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
|
||||
static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
|
||||
arg_rrr_esz *a, int data)
|
||||
{
|
||||
/* These insns use MO_8 to encode BFloat16 */
|
||||
if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
|
||||
return false;
|
||||
}
|
||||
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
||||
}
|
||||
@ -403,6 +407,10 @@ static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||
static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
|
||||
arg_rprr_esz *a)
|
||||
{
|
||||
/* These insns use MO_8 to encode BFloat16. */
|
||||
if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
|
||||
return false;
|
||||
}
|
||||
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
||||
}
|
||||
@ -3875,31 +3883,38 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
|
||||
*** SVE Floating Point Multiply-Add Indexed Group
|
||||
*/
|
||||
|
||||
static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a,
|
||||
gen_helper_gvec_4_ptr *fn)
|
||||
{
|
||||
/* These insns use MO_8 to encode BFloat16 */
|
||||
if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
|
||||
return false;
|
||||
}
|
||||
return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
||||
}
|
||||
|
||||
static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
|
||||
NULL, gen_helper_gvec_fmla_idx_h,
|
||||
gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h,
|
||||
gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
|
||||
};
|
||||
TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||
fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz])
|
||||
|
||||
static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
|
||||
{ NULL, NULL },
|
||||
{ gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx },
|
||||
{ gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
|
||||
{ gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
|
||||
{ gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
|
||||
};
|
||||
TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||
fmls_idx_fns[a->esz][s->fpcr_ah],
|
||||
a->rd, a->rn, a->rm, a->ra, a->index,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a,
|
||||
fmls_idx_fns[a->esz][s->fpcr_ah])
|
||||
|
||||
/*
|
||||
*** SVE Floating Point Multiply Indexed Group
|
||||
*/
|
||||
|
||||
static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
|
||||
NULL, gen_helper_gvec_fmul_idx_h,
|
||||
gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h,
|
||||
gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
|
||||
};
|
||||
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
|
||||
@ -4005,7 +4020,7 @@ static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = {
|
||||
gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d,
|
||||
};
|
||||
TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
|
||||
(s->fpcr_ah ? fmaxqv_fns : fmaxqv_ah_fns)[a->esz], a, 0,
|
||||
(s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
|
||||
static gen_helper_gvec_3_ptr * const fminqv_fns[4] = {
|
||||
@ -4017,7 +4032,7 @@ static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = {
|
||||
gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d,
|
||||
};
|
||||
TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
|
||||
(s->fpcr_ah ? fminqv_fns : fminqv_ah_fns)[a->esz], a, 0,
|
||||
(s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
|
||||
/*
|
||||
@ -4146,7 +4161,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
|
||||
|
||||
#define DO_FP3(NAME, name) \
|
||||
static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
|
||||
NULL, gen_helper_gvec_##name##_h, \
|
||||
gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \
|
||||
gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
|
||||
}; \
|
||||
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
|
||||
@ -4202,13 +4217,34 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
|
||||
s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
|
||||
name##_zpzz_fns[a->esz], a)
|
||||
|
||||
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
|
||||
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
|
||||
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
|
||||
DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
|
||||
DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
|
||||
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
|
||||
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
|
||||
/* Similar, but for insns where sz == 0 encodes bfloat16 */
|
||||
#define DO_ZPZZ_FP_B16(NAME, FEAT, name) \
|
||||
static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
|
||||
gen_helper_##name##_b16, gen_helper_##name##_h, \
|
||||
gen_helper_##name##_s, gen_helper_##name##_d \
|
||||
}; \
|
||||
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
|
||||
|
||||
#define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \
|
||||
static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
|
||||
gen_helper_##name##_b16, gen_helper_##name##_h, \
|
||||
gen_helper_##name##_s, gen_helper_##name##_d \
|
||||
}; \
|
||||
static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
|
||||
gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \
|
||||
gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
|
||||
}; \
|
||||
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
|
||||
s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
|
||||
name##_zpzz_fns[a->esz], a)
|
||||
|
||||
DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd)
|
||||
DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub)
|
||||
DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul)
|
||||
DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
|
||||
DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
|
||||
DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum)
|
||||
DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
|
||||
DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
|
||||
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
|
||||
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
|
||||
@ -4339,19 +4375,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
|
||||
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
|
||||
static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a,
|
||||
gen_helper_gvec_5_ptr *fn)
|
||||
{
|
||||
/* These insns use MO_8 to encode BFloat16 */
|
||||
if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
|
||||
return false;
|
||||
}
|
||||
return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0,
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
||||
}
|
||||
|
||||
#define DO_FMLA(NAME, name, ah_name) \
|
||||
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
|
||||
NULL, gen_helper_sve_##name##_h, \
|
||||
gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \
|
||||
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
|
||||
}; \
|
||||
static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
|
||||
NULL, gen_helper_sve_##ah_name##_h, \
|
||||
gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \
|
||||
gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
|
||||
}; \
|
||||
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
|
||||
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
|
||||
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
|
||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||
TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \
|
||||
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
|
||||
|
||||
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
|
||||
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
|
||||
|
@ -1467,16 +1467,19 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_3OP(gvec_fadd_b16, bfloat16_add, float16)
|
||||
DO_3OP(gvec_fadd_h, float16_add, float16)
|
||||
DO_3OP(gvec_fadd_s, float32_add, float32)
|
||||
DO_3OP(gvec_fadd_d, float64_add, float64)
|
||||
DO_3OP(gvec_bfadd, bfloat16_add, bfloat16)
|
||||
|
||||
DO_3OP(gvec_fsub_b16, bfloat16_sub, float16)
|
||||
DO_3OP(gvec_fsub_h, float16_sub, float16)
|
||||
DO_3OP(gvec_fsub_s, float32_sub, float32)
|
||||
DO_3OP(gvec_fsub_d, float64_sub, float64)
|
||||
DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16)
|
||||
|
||||
DO_3OP(gvec_fmul_b16, bfloat16_mul, float16)
|
||||
DO_3OP(gvec_fmul_h, float16_mul, float16)
|
||||
DO_3OP(gvec_fmul_s, float32_mul, float32)
|
||||
DO_3OP(gvec_fmul_d, float64_mul, float64)
|
||||
@ -1782,6 +1785,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
||||
|
||||
#define nop(N, M, S) (M)
|
||||
|
||||
DO_FMUL_IDX(gvec_fmul_idx_b16, nop, bfloat16_mul, float16, H2)
|
||||
DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2)
|
||||
DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4)
|
||||
DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8)
|
||||
|
Loading…
Reference in New Issue
Block a user