mirror of
https://github.com/qemu/qemu.git
synced 2025-08-15 22:31:15 +00:00
target-arm queue:
* Implement emulation of SME2p1 and SVE2p1 * Correctly enforce alignment checks for v8M loads and stores done via helper functions * Mark the "highbank" and the "midway" machine as deprecated -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmhoABMZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3n5CD/9esli7dCvutRUv0YCDR0ca HyFgZT5Z+rnjdUgIBWk3qPIdmQ+dCvK8gci8Du8mY7WWPvJFc+x2wE9b0trxaARZ ckjPo/dPq18FPRqppbNo5LGeBImwVqMYioJtuLIDw6vdMlm6eYvyyJWoFo6pXXPY 3FlW0vBWZ78/KlQ8dYVK8TQryT2qswjXqvhz96/wCFQWRyWCXNosgETGQQH2z/20 y5qAMkmI3NATaSSnkVox88RipFSnqotKSpczG5MBXs/n4hZvMHHNfrNxgZ17lygP WI4R5j/M3cRHnglRzxVm5xzz0Vy8gWV+Zn97YMN2syJhze2nFQDcD6dWGNEYdCgT R83/FF2yVn7v4ZompmyL97eUtfiFR/t40M+ojdhrfwADNelAU0JbeLahJuJjXfBm ptdiTnDXYD8Ts6X+FTCafWO9ciPmPJ+SyXOcDnRpy8NpNstL6e7Um5BU8Tcw41nV cAP5K5LooQO6yDkrVf2sjFCU9QxamPhCck+xQsT85njy3br3OA2MTGA/ZdD5noet i2EIcdovQjMZqRv/P8c/+WzDhUw27fPbMzLOvl+nUHQM29Mx7hdTvbdvj/CiQtpV wXprWqdG6jeAXeIkhwFs6/8Uc+7mn3guPi8RQZ5uwX5e1pYNSVOKMjGpooVekNbL qjb+ZLPXIpkCV3N5Vbg9Uw== =onnF -----END PGP SIGNATURE----- Merge tag 'pull-target-arm-20250704' of https://gitlab.com/pm215/qemu into staging target-arm queue: * Implement emulation of SME2p1 and SVE2p1 * Correctly enforce alignment checks for v8M loads and stores done via helper functions * Mark the "highbank" and the "midway" machine as deprecated # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmhoABMZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3n5CD/9esli7dCvutRUv0YCDR0ca # HyFgZT5Z+rnjdUgIBWk3qPIdmQ+dCvK8gci8Du8mY7WWPvJFc+x2wE9b0trxaARZ # ckjPo/dPq18FPRqppbNo5LGeBImwVqMYioJtuLIDw6vdMlm6eYvyyJWoFo6pXXPY # 3FlW0vBWZ78/KlQ8dYVK8TQryT2qswjXqvhz96/wCFQWRyWCXNosgETGQQH2z/20 # y5qAMkmI3NATaSSnkVox88RipFSnqotKSpczG5MBXs/n4hZvMHHNfrNxgZ17lygP # WI4R5j/M3cRHnglRzxVm5xzz0Vy8gWV+Zn97YMN2syJhze2nFQDcD6dWGNEYdCgT # R83/FF2yVn7v4ZompmyL97eUtfiFR/t40M+ojdhrfwADNelAU0JbeLahJuJjXfBm # ptdiTnDXYD8Ts6X+FTCafWO9ciPmPJ+SyXOcDnRpy8NpNstL6e7Um5BU8Tcw41nV # cAP5K5LooQO6yDkrVf2sjFCU9QxamPhCck+xQsT85njy3br3OA2MTGA/ZdD5noet # i2EIcdovQjMZqRv/P8c/+WzDhUw27fPbMzLOvl+nUHQM29Mx7hdTvbdvj/CiQtpV # wXprWqdG6jeAXeIkhwFs6/8Uc+7mn3guPi8RQZ5uwX5e1pYNSVOKMjGpooVekNbL # qjb+ZLPXIpkCV3N5Vbg9Uw== # =onnF # -----END PGP SIGNATURE----- # gpg: Signature made Fri 04 Jul 2025 12:23:47 EDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20250704' of https://gitlab.com/pm215/qemu: (119 commits) linux-user/aarch64: Set hwcap bits for SME2p1/SVE2p1 target/arm: Enable FEAT_SME2p1 on -cpu max target/arm: Implement SME2 BFMOPA (non-widening) target/arm: Implement FMOPA (non-widening) for fp16 target/arm: Support FPCR.AH in SME FMOPS, BFMOPS target/arm: Rename BFMOPA to BFMOPA_w target/arm: Rename FMOPA_h to FMOPA_w_h target/arm: Implement LUTI2, LUTI4 for SME2/SME2p1 target/arm: Implement MOVAZ for SME2p1 target/arm: Implement LD1Q, ST1Q for SVE2p1 target/arm: Implement {LD, ST}[234]Q for SME2p1/SVE2p1 target/arm: Move ld1qq and st1qq primitives to sve_ldst_internal.h target/arm: Implement {LD1, ST1}{W, D} (128-bit element) for SVE2p1 target/arm: Split the ST_zpri and ST_zprr patterns target/arm: Implement SME2 counted predicate register load/store target/arm: Implement TBLQ, TBXQ for SME2p1/SVE2p1 target/arm: Implement ZIPQ, UZPQ for SME2p1/SVE2p1 target/arm: Implement PMOV for SME2p1/SVE2p1 target/arm: Implement EXTQ for SME2p1/SVE2p1 target/arm: Implement DUPQ for SME2p1/SVE2p1 ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
df6fe2abf2
@ -352,6 +352,13 @@ they want to use and avoids confusion. Existing users of the ``spike``
|
||||
machine must ensure that they're setting the ``spike`` machine in the
|
||||
command line (``-M spike``).
|
||||
|
||||
Arm ``highbank`` and ``midway`` machines (since 10.1)
|
||||
'''''''''''''''''''''''''''''''''''''''''''''''''''''
|
||||
|
||||
There are no known users left for these machines (if you still use it,
|
||||
please write a mail to the qemu-devel mailing list). If you just want to
|
||||
boot a Cortex-A15 or Cortex-A9 Linux, use the ``virt`` machine instead.
|
||||
|
||||
|
||||
System emulator binaries
|
||||
------------------------
|
||||
|
@ -129,16 +129,22 @@ the following architecture extensions:
|
||||
- FEAT_SM3 (Advanced SIMD SM3 instructions)
|
||||
- FEAT_SM4 (Advanced SIMD SM4 instructions)
|
||||
- FEAT_SME (Scalable Matrix Extension)
|
||||
- FEAT_SME2 (Scalable Matrix Extension version 2)
|
||||
- FEAT_SME2p1 (Scalable Matrix Extension version 2.1)
|
||||
- FEAT_SME_B16B16 (Non-widening BFloat16 arithmetic for SME2)
|
||||
- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
|
||||
- FEAT_SME_F16F16 (Non-widening half-precision FP16 arithmetic for SME2)
|
||||
- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
|
||||
- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
|
||||
- FEAT_SVE (Scalable Vector Extension)
|
||||
- FEAT_SVE_AES (Scalable Vector AES instructions)
|
||||
- FEAT_SVE_B16B16 (Non-widening BFloat16 arithmetic for SVE2)
|
||||
- FEAT_SVE_BitPerm (Scalable Vector Bit Permutes instructions)
|
||||
- FEAT_SVE_PMULL128 (Scalable Vector PMULL instructions)
|
||||
- FEAT_SVE_SHA3 (Scalable Vector SHA3 instructions)
|
||||
- FEAT_SVE_SM4 (Scalable Vector SM4 instructions)
|
||||
- FEAT_SVE2 (Scalable Vector Extension version 2)
|
||||
- FEAT_SVE2p1 (Scalable Vector Extension version 2.1)
|
||||
- FEAT_SPECRES (Speculation restriction instructions)
|
||||
- FEAT_SSBS (Speculative Store Bypass Safe)
|
||||
- FEAT_SSBS2 (MRS and MSR instructions for SSBS version 2)
|
||||
|
@ -357,6 +357,7 @@ static void highbank_class_init(ObjectClass *oc, const void *data)
|
||||
mc->max_cpus = 4;
|
||||
mc->ignore_memory_transaction_failures = true;
|
||||
mc->default_ram_id = "highbank.dram";
|
||||
mc->deprecation_reason = "no known users left for this machine";
|
||||
}
|
||||
|
||||
static const TypeInfo highbank_type = {
|
||||
@ -381,6 +382,7 @@ static void midway_class_init(ObjectClass *oc, const void *data)
|
||||
mc->max_cpus = 4;
|
||||
mc->ignore_memory_transaction_failures = true;
|
||||
mc->default_ram_id = "highbank.dram";
|
||||
mc->deprecation_reason = "no known users left for this machine";
|
||||
}
|
||||
|
||||
static const TypeInfo midway_type = {
|
||||
|
@ -248,7 +248,7 @@ static void target_setup_za_record(struct target_za_context *za,
|
||||
for (i = 0; i < vl; ++i) {
|
||||
uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
|
||||
for (j = 0; j < vq * 2; ++j) {
|
||||
__put_user_e(env->zarray[i].d[j], z + j, le);
|
||||
__put_user_e(env->za_state.za[i].d[j], z + j, le);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -397,7 +397,7 @@ static bool target_restore_za_record(CPUARMState *env,
|
||||
for (i = 0; i < vl; ++i) {
|
||||
uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
|
||||
for (j = 0; j < vq * 2; ++j) {
|
||||
__get_user_e(env->zarray[i].d[j], z + j, le);
|
||||
__get_user_e(env->za_state.za[i].d[j], z + j, le);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -915,6 +915,14 @@ uint64_t get_elf_hwcap2(void)
|
||||
GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
|
||||
GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC);
|
||||
GET_FEATURE_ID(aa64_mops, ARM_HWCAP2_A64_MOPS);
|
||||
GET_FEATURE_ID(aa64_sve2p1, ARM_HWCAP2_A64_SVE2P1);
|
||||
GET_FEATURE_ID(aa64_sme2, (ARM_HWCAP2_A64_SME2 |
|
||||
ARM_HWCAP2_A64_SME_I16I32 |
|
||||
ARM_HWCAP2_A64_SME_BI32I32));
|
||||
GET_FEATURE_ID(aa64_sme2p1, ARM_HWCAP2_A64_SME2P1);
|
||||
GET_FEATURE_ID(aa64_sme_b16b16, ARM_HWCAP2_A64_SME_B16B16);
|
||||
GET_FEATURE_ID(aa64_sme_f16f16, ARM_HWCAP2_A64_SME_F16F16);
|
||||
GET_FEATURE_ID(aa64_sve_b16b16, ARM_HWCAP2_A64_SVE_B16B16);
|
||||
|
||||
return hwcaps;
|
||||
}
|
||||
|
@ -604,6 +604,11 @@ static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ISAR2, RPRES);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_lut(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ISAR2, LUT);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
|
||||
{
|
||||
/* We always set the AdvSIMD and FP fields identically. */
|
||||
@ -932,6 +937,11 @@ static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id)
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) != 0;
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sve2p1(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, SVEVER) >=2;
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, AES) != 0;
|
||||
@ -977,6 +987,21 @@ static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, F64MM) != 0;
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sve_b16b16(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64ZFR0, B16B16);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme_b16b16(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, B16B16);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme_f16f16(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F16F16);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, F64F64);
|
||||
@ -992,6 +1017,44 @@ static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id)
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, FA64);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) != 0;
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2p1(const ARMISARegisters *id)
|
||||
{
|
||||
return FIELD_EX64_IDREG(id, ID_AA64SMFR0, SMEVER) >= 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Combinations of feature tests, for ease of use with TRANS_FEAT.
|
||||
*/
|
||||
static inline bool isar_feature_aa64_sme_or_sve2p1(const ARMISARegisters *id)
|
||||
{
|
||||
return isar_feature_aa64_sme(id) || isar_feature_aa64_sve2p1(id);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2_or_sve2p1(const ARMISARegisters *id)
|
||||
{
|
||||
return isar_feature_aa64_sme2(id) || isar_feature_aa64_sve2p1(id);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2p1_or_sve2p1(const ARMISARegisters *id)
|
||||
{
|
||||
return isar_feature_aa64_sme2p1(id) || isar_feature_aa64_sve2p1(id);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2_i16i64(const ARMISARegisters *id)
|
||||
{
|
||||
return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_i16i64(id);
|
||||
}
|
||||
|
||||
static inline bool isar_feature_aa64_sme2_f64f64(const ARMISARegisters *id)
|
||||
{
|
||||
return isar_feature_aa64_sme2(id) && isar_feature_aa64_sme_f64f64(id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature tests for "does this exist in either 32-bit or 64-bit?"
|
||||
*/
|
||||
|
@ -554,11 +554,15 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
|
||||
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
|
||||
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
|
||||
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
|
||||
set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA]);
|
||||
set_default_nan_mode(1, &env->vfp.fp_status[FPST_ZA_F16]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
|
||||
set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
|
||||
@ -631,6 +635,9 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el)
|
||||
env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK;
|
||||
env->cp15.scr_el3 |= SCR_ENTP2;
|
||||
env->vfp.smcr_el[3] = 0xf;
|
||||
if (cpu_isar_feature(aa64_sme2, cpu)) {
|
||||
env->vfp.smcr_el[3] |= R_SMCR_EZT0_MASK;
|
||||
}
|
||||
}
|
||||
if (cpu_isar_feature(aa64_hcx, cpu)) {
|
||||
env->cp15.scr_el3 |= SCR_HXEN;
|
||||
@ -1331,8 +1338,8 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
|
||||
qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i);
|
||||
for (j = zcr_len; j >= 0; --j) {
|
||||
qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c",
|
||||
env->zarray[i].d[2 * j + 1],
|
||||
env->zarray[i].d[2 * j],
|
||||
env->za_state.za[i].d[2 * j + 1],
|
||||
env->za_state.za[i].d[2 * j],
|
||||
j ? ':' : '\n');
|
||||
}
|
||||
}
|
||||
|
@ -207,6 +207,8 @@ typedef struct NVICState NVICState;
|
||||
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
|
||||
* and the reciprocal and square root estimate/step insns);
|
||||
* for half-precision
|
||||
* ZA: the "streaming sve" fp status.
|
||||
* ZA_F16: likewise for half-precision.
|
||||
*
|
||||
* Half-precision operations are governed by a separate
|
||||
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
|
||||
@ -227,6 +229,12 @@ typedef struct NVICState NVICState;
|
||||
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
|
||||
* which means we need an FPST_AH_F16 as well.
|
||||
*
|
||||
* The "ZA" float_status are for Streaming SVE operations which use
|
||||
* default-NaN and do not generate fp exceptions, which means that they
|
||||
* do not accumulate exception bits back into FPCR.
|
||||
* See e.g. FPAdd vs FPAdd_ZA pseudocode functions, and the setting
|
||||
* of fpcr.DN and fpexec parameters.
|
||||
*
|
||||
* To avoid having to transfer exception bits around, we simply
|
||||
* say that the FPSCR cumulative exception flags are the logical
|
||||
* OR of the flags in the four fp statuses. This relies on the
|
||||
@ -240,10 +248,12 @@ typedef enum ARMFPStatusFlavour {
|
||||
FPST_A64_F16,
|
||||
FPST_AH,
|
||||
FPST_AH_F16,
|
||||
FPST_ZA,
|
||||
FPST_ZA_F16,
|
||||
FPST_STD,
|
||||
FPST_STD_F16,
|
||||
} ARMFPStatusFlavour;
|
||||
#define FPST_COUNT 8
|
||||
#define FPST_COUNT 10
|
||||
|
||||
typedef struct CPUArchState {
|
||||
/* Regs for current mode. */
|
||||
@ -669,9 +679,6 @@ typedef struct CPUArchState {
|
||||
|
||||
uint32_t xregs[16];
|
||||
|
||||
/* Scratch space for aa32 neon expansion. */
|
||||
uint32_t scratch[8];
|
||||
|
||||
/* There are a number of distinct float control structures. */
|
||||
float_status fp_status[FPST_COUNT];
|
||||
|
||||
@ -708,27 +715,36 @@ typedef struct CPUArchState {
|
||||
|
||||
uint64_t scxtnum_el[4];
|
||||
|
||||
/*
|
||||
* SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
|
||||
* as we do with vfp.zregs[]. This corresponds to the architectural ZA
|
||||
* array, where ZA[N] is in the least-significant bytes of env->zarray[N].
|
||||
* When SVL is less than the architectural maximum, the accessible
|
||||
* storage is restricted, such that if the SVL is X bytes the guest can
|
||||
* see only the bottom X elements of zarray[], and only the least
|
||||
* significant X bytes of each element of the array. (In other words,
|
||||
* the observable part is always square.)
|
||||
*
|
||||
* The ZA storage can also be considered as a set of square tiles of
|
||||
* elements of different sizes. The mapping from tiles to the ZA array
|
||||
* is architecturally defined, such that for tiles of elements of esz
|
||||
* bytes, the Nth row (or "horizontal slice") of tile T is in
|
||||
* ZA[T + N * esz]. Note that this means that each tile is not contiguous
|
||||
* in the ZA storage, because its rows are striped through the ZA array.
|
||||
*
|
||||
* Because this is so large, keep this toward the end of the reset area,
|
||||
* to keep the offsets into the rest of the structure smaller.
|
||||
*/
|
||||
ARMVectorReg zarray[ARM_MAX_VQ * 16];
|
||||
struct {
|
||||
/* SME2 ZT0 -- 512 bit array, with data ordered like ARMVectorReg. */
|
||||
uint64_t zt0[512 / 64] QEMU_ALIGNED(16);
|
||||
|
||||
/*
|
||||
* SME ZA storage -- 256 x 256 byte array, with bytes in host
|
||||
* word order, as we do with vfp.zregs[]. This corresponds to
|
||||
* the architectural ZA array, where ZA[N] is in the least
|
||||
* significant bytes of env->za_state.za[N].
|
||||
*
|
||||
* When SVL is less than the architectural maximum, the accessible
|
||||
* storage is restricted, such that if the SVL is X bytes the guest
|
||||
* can see only the bottom X elements of zarray[], and only the least
|
||||
* significant X bytes of each element of the array. (In other words,
|
||||
* the observable part is always square.)
|
||||
*
|
||||
* The ZA storage can also be considered as a set of square tiles of
|
||||
* elements of different sizes. The mapping from tiles to the ZA array
|
||||
* is architecturally defined, such that for tiles of elements of esz
|
||||
* bytes, the Nth row (or "horizontal slice") of tile T is in
|
||||
* ZA[T + N * esz]. Note that this means that each tile is not
|
||||
* contiguous in the ZA storage, because its rows are striped through
|
||||
* the ZA array.
|
||||
*
|
||||
* Because this is so large, keep this toward the end of the
|
||||
* reset area, to keep the offsets into the rest of the structure
|
||||
* smaller.
|
||||
*/
|
||||
ARMVectorReg za[ARM_MAX_VQ * 16];
|
||||
} za_state;
|
||||
|
||||
struct CPUBreakpoint *cpu_breakpoint[16];
|
||||
struct CPUWatchpoint *cpu_watchpoint[16];
|
||||
@ -1120,6 +1136,7 @@ struct ArchCPU {
|
||||
|
||||
/* Used to set the maximum vector length the cpu will support. */
|
||||
uint32_t sve_max_vq;
|
||||
uint32_t sme_max_vq;
|
||||
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
/* Used to set the default vector length at process start. */
|
||||
@ -1497,6 +1514,7 @@ FIELD(SVCR, ZA, 1, 1)
|
||||
|
||||
/* Fields for SMCR_ELx. */
|
||||
FIELD(SMCR, LEN, 0, 4)
|
||||
FIELD(SMCR, EZT0, 30, 1)
|
||||
FIELD(SMCR, FA64, 31, 1)
|
||||
|
||||
/* Write a new value to v7m.exception, thus transitioning into or out
|
||||
@ -2198,6 +2216,7 @@ FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
|
||||
FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
|
||||
FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
|
||||
FIELD(ID_AA64ISAR2, CSSC, 52, 4)
|
||||
FIELD(ID_AA64ISAR2, LUT, 56, 4)
|
||||
FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
|
||||
|
||||
FIELD(ID_AA64PFR0, EL0, 0, 4)
|
||||
@ -3067,6 +3086,7 @@ FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
|
||||
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
|
||||
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
|
||||
FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
|
||||
FIELD(TBFLAG_A64, ZT0EXC_EL, 39, 2)
|
||||
|
||||
/*
|
||||
* Helpers for using the above. Note that only the A64 accessors use
|
||||
|
@ -259,6 +259,13 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
|
||||
/* From now on sve_max_vq is the actual maximum supported length. */
|
||||
cpu->sve_max_vq = max_vq;
|
||||
cpu->sve_vq.map = vq_map;
|
||||
|
||||
/* FEAT_F64MM requires the existence of a 256-bit vector size. */
|
||||
if (max_vq < 2) {
|
||||
uint64_t t = GET_IDREG(&cpu->isar, ID_AA64ZFR0);
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 0);
|
||||
SET_IDREG(&cpu->isar, ID_AA64ZFR0, t);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -356,6 +363,7 @@ void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp)
|
||||
}
|
||||
|
||||
cpu->sme_vq.map = vq_map;
|
||||
cpu->sme_max_vq = 32 - clz32(vq_map);
|
||||
}
|
||||
|
||||
static bool cpu_arm_get_sme(Object *obj, Error **errp)
|
||||
|
@ -6663,7 +6663,7 @@ void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
|
||||
* when disabled either.
|
||||
*/
|
||||
if (change & new & R_SVCR_ZA_MASK) {
|
||||
memset(env->zarray, 0, sizeof(env->zarray));
|
||||
memset(&env->za_state, 0, sizeof(env->za_state));
|
||||
}
|
||||
|
||||
if (tcg_enabled()) {
|
||||
@ -6682,10 +6682,14 @@ static void smcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||
{
|
||||
int cur_el = arm_current_el(env);
|
||||
int old_len = sve_vqm1_for_el(env, cur_el);
|
||||
uint64_t valid_mask = R_SMCR_LEN_MASK | R_SMCR_FA64_MASK;
|
||||
int new_len;
|
||||
|
||||
QEMU_BUILD_BUG_ON(ARM_MAX_VQ > R_SMCR_LEN_MASK + 1);
|
||||
value &= R_SMCR_LEN_MASK | R_SMCR_FA64_MASK;
|
||||
if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) {
|
||||
valid_mask |= R_SMCR_EZT0_MASK;
|
||||
}
|
||||
value &= valid_mask;
|
||||
raw_write(env, ri, value);
|
||||
|
||||
/*
|
||||
|
@ -315,12 +315,31 @@ static const VMStateDescription vmstate_za = {
|
||||
.minimum_version_id = 1,
|
||||
.needed = za_needed,
|
||||
.fields = (const VMStateField[]) {
|
||||
VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0,
|
||||
VMSTATE_STRUCT_ARRAY(env.za_state.za, ARMCPU, ARM_MAX_VQ * 16, 0,
|
||||
vmstate_vreg, ARMVectorReg),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static bool zt0_needed(void *opaque)
|
||||
{
|
||||
ARMCPU *cpu = opaque;
|
||||
|
||||
return za_needed(cpu) && cpu_isar_feature(aa64_sme2, cpu);
|
||||
}
|
||||
|
||||
static const VMStateDescription vmstate_zt0 = {
|
||||
.name = "cpu/zt0",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.needed = zt0_needed,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64_ARRAY(env.za_state.zt0, ARMCPU,
|
||||
ARRAY_SIZE(((CPUARMState *)0)->za_state.zt0)),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
static bool serror_needed(void *opaque)
|
||||
{
|
||||
ARMCPU *cpu = opaque;
|
||||
@ -1096,6 +1115,7 @@ const VMStateDescription vmstate_arm_cpu = {
|
||||
&vmstate_m_security,
|
||||
&vmstate_sve,
|
||||
&vmstate_za,
|
||||
&vmstate_zt0,
|
||||
&vmstate_serror,
|
||||
&vmstate_irq_line_state,
|
||||
&vmstate_wfxt_timer,
|
||||
|
@ -80,6 +80,7 @@ typedef enum {
|
||||
SME_ET_Streaming,
|
||||
SME_ET_NotStreaming,
|
||||
SME_ET_InactiveZA,
|
||||
SME_ET_InaccessibleZT0,
|
||||
} SMEExceptionType;
|
||||
|
||||
#define ARM_EL_EC_LENGTH 6
|
||||
|
@ -1201,7 +1201,7 @@ void aarch64_max_tcg_initfn(Object *obj)
|
||||
*/
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, SME, 2); /* FEAT_SME2 */
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */
|
||||
t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */
|
||||
SET_IDREG(isar, ID_AA64PFR1, t);
|
||||
@ -1250,10 +1250,11 @@ void aarch64_max_tcg_initfn(Object *obj)
|
||||
FIELD_DP64_IDREG(isar, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */
|
||||
|
||||
t = GET_IDREG(isar, ID_AA64ZFR0);
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 2); /* FEAT_SVE2p1 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 2); /* FEAT_BF16, FEAT_EBF16 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, B16B16, 1); /* FEAT_SVE_B16B16 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */
|
||||
t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */
|
||||
@ -1269,11 +1270,16 @@ void aarch64_max_tcg_initfn(Object *obj)
|
||||
|
||||
t = GET_IDREG(isar, ID_AA64SMFR0);
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, BI32I32, 1); /* FEAT_SME2 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, F16F16, 1); /* FEAT_SME_F16F16 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, B16B16, 1); /* FEAT_SME_B16B16 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, I16I32, 5); /* FEAT_SME2 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, SMEVER, 2); /* FEAT_SME2p1 */
|
||||
t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
|
||||
SET_IDREG(isar, ID_AA64SMFR0, t);
|
||||
|
||||
|
@ -369,3 +369,14 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz)
|
||||
{
|
||||
static gen_helper_gvec_3 * const fns[4] = {
|
||||
gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
|
||||
gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
|
||||
};
|
||||
tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
|
||||
}
|
||||
|
@ -402,6 +402,8 @@ AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
|
||||
AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
|
||||
AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
|
||||
AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
|
||||
AH_MINMAX_HELPER(sme2_ah_fmax_b16, bfloat16, bfloat16, max)
|
||||
AH_MINMAX_HELPER(sme2_ah_fmin_b16, bfloat16, bfloat16, min)
|
||||
|
||||
/* 64-bit versions of the CRC helpers. Note that although the operation
|
||||
* (and the prototypes of crc32c() and crc32() mean that only the bottom
|
||||
|
@ -33,6 +33,21 @@ DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
@ -120,14 +135,45 @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_w_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_7(sme_bfmopa_w, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_bfmopa, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sme_fmops_w_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmops_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmops_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_fmops_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_bfmops_w, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_bfmops, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_7(sme_ah_fmops_w_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_ah_fmops_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_ah_fmops_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_ah_fmops_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_ah_bfmops_w, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_7(sme_ah_bfmops, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
|
||||
@ -144,3 +190,168 @@ DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sme2_bmopa_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(sme2_smopa2_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_6(sme2_umopa2_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmax_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmin_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_fmax_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_fmin_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmaxnum_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fminnum_b16, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sme2_fdot_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_6(sme2_fdot_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_6(sme2_fvdot_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_svdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_suvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_usvdot_idx_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_svdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_svdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uvdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme2_smlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_usmlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme2_smlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_smlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_sumlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_scvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_ucvtf, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvt_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtu_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtu_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvt_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtu_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvtn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvtn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqcvtun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_zip2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_zip2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_zip2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_zip2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_zip2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_uzp2_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uzp2_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uzp2_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uzp2_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uzp2_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_zip4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_zip4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_zip4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_zip4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_zip4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uzp4_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshr_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshru_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_sh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_uqrshrn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sme2_sqrshrun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_sclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_sclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_sclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_sclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_uclamp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_uclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme2_fclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_fclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_fclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_bfclamp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme2_sel_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_sel_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_sel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sme2_sel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
|
||||
|
@ -676,11 +676,21 @@ DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tblq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tblq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tblq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tblq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tbxq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tbxq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tbxq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_tbxq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
@ -701,12 +711,22 @@ DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_zipq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_zipq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_zipq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_zipq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uzpq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uzpq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uzpq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uzpq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
@ -937,10 +957,17 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_while2l, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(sve_while2g, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_whilecl, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(sve_whilecg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_subri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
@ -1071,6 +1098,55 @@ DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG,
|
||||
i64, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_faddqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_faddqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_faddqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxnmqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminnmqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fmaxqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_fminqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fmaxqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ah_fminqv_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
|
||||
i64, i64, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
|
||||
@ -1582,6 +1658,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1602,9 +1686,15 @@ DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1squ_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1squ_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1640,6 +1730,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1660,9 +1758,15 @@ DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1squ_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ld1squ_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1858,6 +1962,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1870,6 +1982,11 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st1sq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1dq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1dq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1905,6 +2022,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
@ -1917,6 +2042,11 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve_st1sq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1sq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1dq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_4(sve_st1dq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
|
||||
@ -2025,6 +2155,10 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldqq_le_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldqq_be_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
@ -2134,6 +2268,10 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldqq_le_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_ldqq_be_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
@ -2419,6 +2557,10 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stqq_le_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stqq_be_zd, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
@ -2486,6 +2628,10 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stqq_le_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_6(sve_stqq_be_zd_mte, TCG_CALL_NO_WG,
|
||||
void, env, ptr, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, i32)
|
||||
@ -2922,3 +3068,69 @@ DEF_HELPER_FLAGS_4(sve2_sqshlu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_sqshlu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_sqshlu_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2_sqshlu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_addqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_addqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_addqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_addqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_smaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_smaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_smaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_smaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_sminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_sminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_sminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_sminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_umaxqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_umaxqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_umaxqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_umaxqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_uminqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(pext, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_orqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_orqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_orqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_orqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_eorqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_eorqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_eorqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_eorqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sve2p1_andqv_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_andqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_andqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sve2p1_andqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(pmov_pv_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(pmov_pv_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(pmov_pv_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(pmov_vp_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(pmov_vp_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(pmov_vp_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_ld1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1bb_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1hh_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1hh_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1ss_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1ss_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1dd_le_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
DEF_HELPER_FLAGS_5(sve2p1_st1dd_be_c, TCG_CALL_NO_WG, void, env, ptr, tl, i32, i32)
|
||||
|
@ -353,6 +353,14 @@ DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_2(neon_add_u8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_add_u16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
|
||||
@ -608,23 +616,31 @@ DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usdot_4b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_idx_4b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_idx_4b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_idx_4h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_idx_4h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_sudot_idx_4b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_5(gvec_usdot_idx_4b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_sdot_idx_2h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_udot_idx_2h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
|
||||
@ -715,10 +731,12 @@ DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
@ -774,23 +792,26 @@ DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst,
|
||||
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmls_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmls_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfmla, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_ah_bfmls, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, fpst, i32)
|
||||
@ -822,6 +843,8 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmla_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
@ -829,6 +852,8 @@ DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmls_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
@ -836,6 +861,8 @@ DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_ah_bfmls_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
@ -1081,14 +1108,24 @@ DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfdot_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_6(sme2_bfvdot_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmmla, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmlsl, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_bfmlsl_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
DEF_HELPER_FLAGS_6(gvec_ah_bfmlsl_idx, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
@ -1151,3 +1188,27 @@ DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_4b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti2_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_1b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_1h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_1s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_2b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_2h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_2s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_4b, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_4h, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
DEF_HELPER_FLAGS_4(sme2_luti4_4s, TCG_CALL_NO_RWG, void, ptr, ptr, env, i32)
|
||||
|
@ -214,6 +214,31 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
|
||||
return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the exception level to which exceptions should be taken for ZT0.
|
||||
* C.f. the ARM pseudocode function CheckSMEZT0Enabled, after the ZA check.
|
||||
*/
|
||||
static int zt0_exception_el(CPUARMState *env, int el)
|
||||
{
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
if (el <= 1
|
||||
&& !el_is_in_host(env, el)
|
||||
&& !FIELD_EX64(env->vfp.smcr_el[1], SMCR, EZT0)) {
|
||||
return 1;
|
||||
}
|
||||
if (el <= 2
|
||||
&& arm_is_el2_enabled(env)
|
||||
&& !FIELD_EX64(env->vfp.smcr_el[2], SMCR, EZT0)) {
|
||||
return 2;
|
||||
}
|
||||
if (arm_feature(env, ARM_FEATURE_EL3)
|
||||
&& !FIELD_EX64(env->vfp.smcr_el[3], SMCR, EZT0)) {
|
||||
return 3;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
|
||||
ARMMMUIdx mmu_idx)
|
||||
{
|
||||
@ -269,7 +294,14 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
|
||||
DP_TBFLAG_A64(flags, PSTATE_SM, 1);
|
||||
DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
|
||||
}
|
||||
DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
|
||||
|
||||
if (FIELD_EX64(env->svcr, SVCR, ZA)) {
|
||||
DP_TBFLAG_A64(flags, PSTATE_ZA, 1);
|
||||
if (cpu_isar_feature(aa64_sme2, env_archcpu(env))) {
|
||||
int zt0_el = zt0_exception_el(env, el);
|
||||
DP_TBFLAG_A64(flags, ZT0EXC_EL, zt0_el);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sctlr = regime_sctlr(env, stage1);
|
||||
|
@ -632,8 +632,11 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
|
||||
}
|
||||
|
||||
/* Note that these stores can throw exceptions on MPU faults */
|
||||
cpu_stl_data_ra(env, sp, nextinst, GETPC());
|
||||
cpu_stl_data_ra(env, sp + 4, saved_psr, GETPC());
|
||||
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN,
|
||||
arm_to_core_mmu_idx(mmu_idx));
|
||||
cpu_stl_mmu(env, sp, nextinst, oi, GETPC());
|
||||
cpu_stl_mmu(env, sp + 4, saved_psr, oi, GETPC());
|
||||
|
||||
env->regs[13] = sp;
|
||||
env->regs[14] = 0xfeffffff;
|
||||
@ -1048,6 +1051,9 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
|
||||
bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
|
||||
bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK;
|
||||
uintptr_t ra = GETPC();
|
||||
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN,
|
||||
arm_to_core_mmu_idx(mmu_idx));
|
||||
|
||||
assert(env->v7m.secure);
|
||||
|
||||
@ -1073,7 +1079,7 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
|
||||
* Note that we do not use v7m_stack_write() here, because the
|
||||
* accesses should not set the FSR bits for stacking errors if they
|
||||
* fail. (In pseudocode terms, they are AccType_NORMAL, not AccType_STACK
|
||||
* or AccType_LAZYFP). Faults in cpu_stl_data_ra() will throw exceptions
|
||||
* or AccType_LAZYFP). Faults in cpu_stl_mmu() will throw exceptions
|
||||
* and longjmp out.
|
||||
*/
|
||||
if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) {
|
||||
@ -1089,12 +1095,12 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
|
||||
if (i >= 16) {
|
||||
faddr += 8; /* skip the slot for the FPSCR */
|
||||
}
|
||||
cpu_stl_data_ra(env, faddr, slo, ra);
|
||||
cpu_stl_data_ra(env, faddr + 4, shi, ra);
|
||||
cpu_stl_mmu(env, faddr, slo, oi, ra);
|
||||
cpu_stl_mmu(env, faddr + 4, shi, oi, ra);
|
||||
}
|
||||
cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra);
|
||||
cpu_stl_mmu(env, fptr + 0x40, vfp_get_fpscr(env), oi, ra);
|
||||
if (cpu_isar_feature(aa32_mve, cpu)) {
|
||||
cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra);
|
||||
cpu_stl_mmu(env, fptr + 0x44, env->v7m.vpr, oi, ra);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1121,6 +1127,9 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
|
||||
{
|
||||
ARMCPU *cpu = env_archcpu(env);
|
||||
uintptr_t ra = GETPC();
|
||||
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN,
|
||||
arm_to_core_mmu_idx(mmu_idx));
|
||||
|
||||
/* fptr is the value of Rn, the frame pointer we load the FP regs from */
|
||||
assert(env->v7m.secure);
|
||||
@ -1155,16 +1164,16 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
|
||||
faddr += 8; /* skip the slot for the FPSCR and VPR */
|
||||
}
|
||||
|
||||
slo = cpu_ldl_data_ra(env, faddr, ra);
|
||||
shi = cpu_ldl_data_ra(env, faddr + 4, ra);
|
||||
slo = cpu_ldl_mmu(env, faddr, oi, ra);
|
||||
shi = cpu_ldl_mmu(env, faddr + 4, oi, ra);
|
||||
|
||||
dn = (uint64_t) shi << 32 | slo;
|
||||
*aa32_vfp_dreg(env, i / 2) = dn;
|
||||
}
|
||||
fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra);
|
||||
fpscr = cpu_ldl_mmu(env, fptr + 0x40, oi, ra);
|
||||
vfp_set_fpscr(env, fpscr);
|
||||
if (cpu_isar_feature(aa32_mve, cpu)) {
|
||||
env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra);
|
||||
env->v7m.vpr = cpu_ldl_mmu(env, fptr + 0x44, oi, ra);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1937,7 +1946,7 @@ static bool do_v7m_function_return(ARMCPU *cpu)
|
||||
* do them as secure, so work out what MMU index that is.
|
||||
*/
|
||||
mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
|
||||
oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
|
||||
oi = make_memop_idx(MO_LEUL | MO_ALIGN, arm_to_core_mmu_idx(mmu_idx));
|
||||
newpc = cpu_ldl_mmu(env, frameptr, oi, 0);
|
||||
newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0);
|
||||
|
||||
|
@ -148,13 +148,15 @@ static void mve_advance_vpt(CPUARMState *env)
|
||||
}
|
||||
|
||||
/* For loads, predicated lanes are zeroed instead of keeping their old values */
|
||||
#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \
|
||||
#define DO_VLDR(OP, MFLAG, MSIZE, MTYPE, LDTYPE, ESIZE, TYPE) \
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
|
||||
{ \
|
||||
TYPE *d = vd; \
|
||||
uint16_t mask = mve_element_mask(env); \
|
||||
uint16_t eci_mask = mve_eci_mask(env); \
|
||||
unsigned b, e; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
|
||||
/* \
|
||||
* R_SXTM allows the dest reg to become UNKNOWN for abandoned \
|
||||
* beats so we don't care if we update part of the dest and \
|
||||
@ -163,46 +165,48 @@ static void mve_advance_vpt(CPUARMState *env)
|
||||
for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
|
||||
if (eci_mask & (1 << b)) { \
|
||||
d[H##ESIZE(e)] = (mask & (1 << b)) ? \
|
||||
cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
|
||||
(MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0;\
|
||||
} \
|
||||
addr += MSIZE; \
|
||||
} \
|
||||
mve_advance_vpt(env); \
|
||||
}
|
||||
|
||||
#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \
|
||||
#define DO_VSTR(OP, MFLAG, MSIZE, STTYPE, ESIZE, TYPE) \
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
|
||||
{ \
|
||||
TYPE *d = vd; \
|
||||
uint16_t mask = mve_element_mask(env); \
|
||||
unsigned b, e; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
|
||||
for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
|
||||
if (mask & (1 << b)) { \
|
||||
cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
|
||||
cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \
|
||||
} \
|
||||
addr += MSIZE; \
|
||||
} \
|
||||
mve_advance_vpt(env); \
|
||||
}
|
||||
|
||||
DO_VLDR(vldrb, 1, ldub, 1, uint8_t)
|
||||
DO_VLDR(vldrh, 2, lduw, 2, uint16_t)
|
||||
DO_VLDR(vldrw, 4, ldl, 4, uint32_t)
|
||||
DO_VLDR(vldrb, MO_UB, 1, uint8_t, ldb, 1, uint8_t)
|
||||
DO_VLDR(vldrh, MO_TEUW, 2, uint16_t, ldw, 2, uint16_t)
|
||||
DO_VLDR(vldrw, MO_TEUL, 4, uint32_t, ldl, 4, uint32_t)
|
||||
|
||||
DO_VSTR(vstrb, 1, stb, 1, uint8_t)
|
||||
DO_VSTR(vstrh, 2, stw, 2, uint16_t)
|
||||
DO_VSTR(vstrw, 4, stl, 4, uint32_t)
|
||||
DO_VSTR(vstrb, MO_UB, 1, stb, 1, uint8_t)
|
||||
DO_VSTR(vstrh, MO_TEUW, 2, stw, 2, uint16_t)
|
||||
DO_VSTR(vstrw, MO_TEUL, 4, stl, 4, uint32_t)
|
||||
|
||||
DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t)
|
||||
DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t)
|
||||
DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t)
|
||||
DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t)
|
||||
DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t)
|
||||
DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t)
|
||||
DO_VLDR(vldrb_sh, MO_SB, 1, int8_t, ldb, 2, int16_t)
|
||||
DO_VLDR(vldrb_sw, MO_SB, 1, int8_t, ldb, 4, int32_t)
|
||||
DO_VLDR(vldrb_uh, MO_UB, 1, uint8_t, ldb, 2, uint16_t)
|
||||
DO_VLDR(vldrb_uw, MO_UB, 1, uint8_t, ldb, 4, uint32_t)
|
||||
DO_VLDR(vldrh_sw, MO_TESW, 2, int16_t, ldw, 4, int32_t)
|
||||
DO_VLDR(vldrh_uw, MO_TEUW, 2, uint16_t, ldw, 4, uint32_t)
|
||||
|
||||
DO_VSTR(vstrb_h, 1, stb, 2, int16_t)
|
||||
DO_VSTR(vstrb_w, 1, stb, 4, int32_t)
|
||||
DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
DO_VSTR(vstrb_h, MO_UB, 1, stb, 2, int16_t)
|
||||
DO_VSTR(vstrb_w, MO_UB, 1, stb, 4, int32_t)
|
||||
DO_VSTR(vstrh_w, MO_TEUW, 2, stw, 4, int32_t)
|
||||
|
||||
#undef DO_VLDR
|
||||
#undef DO_VSTR
|
||||
@ -214,7 +218,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
* For loads, predicated lanes are zeroed instead of retaining
|
||||
* their previous values.
|
||||
*/
|
||||
#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \
|
||||
#define DO_VLDR_SG(OP, MFLAG, MTYPE, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)\
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
|
||||
uint32_t base) \
|
||||
{ \
|
||||
@ -224,13 +228,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
uint16_t eci_mask = mve_eci_mask(env); \
|
||||
unsigned e; \
|
||||
uint32_t addr; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
|
||||
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
|
||||
if (!(eci_mask & 1)) { \
|
||||
continue; \
|
||||
} \
|
||||
addr = ADDRFN(base, m[H##ESIZE(e)]); \
|
||||
d[H##ESIZE(e)] = (mask & 1) ? \
|
||||
cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
|
||||
(MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0; \
|
||||
if (WB) { \
|
||||
m[H##ESIZE(e)] = addr; \
|
||||
} \
|
||||
@ -239,7 +245,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
}
|
||||
|
||||
/* We know here TYPE is unsigned so always the same as the offset type */
|
||||
#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
|
||||
#define DO_VSTR_SG(OP, MFLAG, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
|
||||
uint32_t base) \
|
||||
{ \
|
||||
@ -249,13 +255,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
uint16_t eci_mask = mve_eci_mask(env); \
|
||||
unsigned e; \
|
||||
uint32_t addr; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
|
||||
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
|
||||
if (!(eci_mask & 1)) { \
|
||||
continue; \
|
||||
} \
|
||||
addr = ADDRFN(base, m[H##ESIZE(e)]); \
|
||||
if (mask & 1) { \
|
||||
cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
|
||||
cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \
|
||||
} \
|
||||
if (WB) { \
|
||||
m[H##ESIZE(e)] = addr; \
|
||||
@ -282,13 +290,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
uint16_t eci_mask = mve_eci_mask(env); \
|
||||
unsigned e; \
|
||||
uint32_t addr; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
|
||||
if (!(eci_mask & 1)) { \
|
||||
continue; \
|
||||
} \
|
||||
addr = ADDRFN(base, m[H4(e & ~1)]); \
|
||||
addr += 4 * (e & 1); \
|
||||
d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
|
||||
d[H4(e)] = (mask & 1) ? cpu_ldl_mmu(env, addr, oi, GETPC()) : 0; \
|
||||
if (WB && (e & 1)) { \
|
||||
m[H4(e & ~1)] = addr - 4; \
|
||||
} \
|
||||
@ -306,6 +316,8 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
uint16_t eci_mask = mve_eci_mask(env); \
|
||||
unsigned e; \
|
||||
uint32_t addr; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
|
||||
if (!(eci_mask & 1)) { \
|
||||
continue; \
|
||||
@ -313,7 +325,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
addr = ADDRFN(base, m[H4(e & ~1)]); \
|
||||
addr += 4 * (e & 1); \
|
||||
if (mask & 1) { \
|
||||
cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \
|
||||
cpu_stl_mmu(env, addr, d[H4(e)], oi, GETPC()); \
|
||||
} \
|
||||
if (WB && (e & 1)) { \
|
||||
m[H4(e & ~1)] = addr - 4; \
|
||||
@ -327,40 +339,44 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
||||
#define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2))
|
||||
#define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3))
|
||||
|
||||
DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_sh, MO_SB, int8_t, ldb, 2, int16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_sw, MO_SB, int8_t, ldb, 4, int32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_sw, MO_TESW, int16_t, ldw, 4, int32_t, uint32_t, ADDR_ADD, false)
|
||||
|
||||
DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_ub, MO_UB, uint8_t, ldb, 1, uint8_t, uint8_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_uh, MO_UB, uint8_t, ldb, 2, uint16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrb_sg_uw, MO_UB, uint8_t, ldb, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_uh, MO_TEUW, uint16_t, ldw, 2, uint16_t, uint16_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrh_sg_uw, MO_TEUW, uint16_t, ldw, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR_SG(vldrw_sg_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
|
||||
DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false)
|
||||
|
||||
DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false)
|
||||
DO_VLDR_SG(vldrh_sg_os_sw, MO_TESW, int16_t, ldw, 4,
|
||||
int32_t, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrh_sg_os_uh, MO_TEUW, uint16_t, ldw, 2,
|
||||
uint16_t, uint16_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrh_sg_os_uw, MO_TEUW, uint16_t, ldw, 4,
|
||||
uint32_t, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VLDR_SG(vldrw_sg_os_uw, MO_TEUL, uint32_t, ldl, 4,
|
||||
uint32_t, uint32_t, ADDR_ADD_OSW, false)
|
||||
DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false)
|
||||
|
||||
DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrb_sg_ub, MO_UB, stb, 1, uint8_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrb_sg_uh, MO_UB, stb, 2, uint16_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrb_sg_uw, MO_UB, stb, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrh_sg_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrh_sg_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR_SG(vstrw_sg_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, false)
|
||||
DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false)
|
||||
|
||||
DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false)
|
||||
DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false)
|
||||
DO_VSTR_SG(vstrh_sg_os_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD_OSH, false)
|
||||
DO_VSTR_SG(vstrh_sg_os_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD_OSH, false)
|
||||
DO_VSTR_SG(vstrw_sg_os_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD_OSW, false)
|
||||
DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false)
|
||||
|
||||
DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
|
||||
DO_VLDR_SG(vldrw_sg_wb_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
|
||||
DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true)
|
||||
DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true)
|
||||
DO_VSTR_SG(vstrw_sg_wb_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, true)
|
||||
DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
|
||||
|
||||
/*
|
||||
@ -387,13 +403,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
|
||||
uint16_t mask = mve_eci_mask(env); \
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat] * 4; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
for (e = 0; e < 4; e++, data >>= 8) { \
|
||||
uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
|
||||
qd[H1(off[beat])] = data; \
|
||||
@ -411,13 +429,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
|
||||
uint32_t addr, data; \
|
||||
int y; /* y counts 0 2 0 2 */ \
|
||||
uint16_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat] * 8 + (beat & 1) * 4; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \
|
||||
qd[H2(off[beat])] = data; \
|
||||
data >>= 16; \
|
||||
@ -436,13 +456,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
|
||||
uint32_t addr, data; \
|
||||
uint32_t *qd; \
|
||||
int y; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat] * 4; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
y = (beat + (O1 & 2)) & 3; \
|
||||
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
|
||||
qd[H4(off[beat] >> 2)] = data; \
|
||||
@ -473,13 +495,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
uint8_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat] * 2; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
for (e = 0; e < 4; e++, data >>= 8) { \
|
||||
qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
|
||||
qd[H1(off[beat] + (e >> 1))] = data; \
|
||||
@ -497,13 +521,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
|
||||
uint32_t addr, data; \
|
||||
int e; \
|
||||
uint16_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat] * 4; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
for (e = 0; e < 2; e++, data >>= 16) { \
|
||||
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
|
||||
qd[H2(off[beat])] = data; \
|
||||
@ -520,13 +546,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
uint32_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
continue; \
|
||||
} \
|
||||
addr = base + off[beat]; \
|
||||
data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
|
||||
data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
|
||||
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
|
||||
qd[H4(off[beat] >> 3)] = data; \
|
||||
} \
|
||||
@ -549,6 +577,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
uint16_t mask = mve_eci_mask(env); \
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -560,7 +590,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
|
||||
data = (data << 8) | qd[H1(off[beat])]; \
|
||||
} \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -574,6 +604,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
uint32_t addr, data; \
|
||||
int y; /* y counts 0 2 0 2 */ \
|
||||
uint16_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -584,7 +616,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
data = qd[H2(off[beat])]; \
|
||||
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \
|
||||
data |= qd[H2(off[beat])] << 16; \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -598,6 +630,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
uint32_t addr, data; \
|
||||
uint32_t *qd; \
|
||||
int y; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -607,7 +641,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
|
||||
y = (beat + (O1 & 2)) & 3; \
|
||||
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
|
||||
data = qd[H4(off[beat] >> 2)]; \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -635,6 +669,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
uint8_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -646,7 +682,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
|
||||
data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \
|
||||
} \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -660,6 +696,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
uint32_t addr, data; \
|
||||
int e; \
|
||||
uint16_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -671,7 +709,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
|
||||
data = (data << 16) | qd[H2(off[beat])]; \
|
||||
} \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -684,6 +722,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
|
||||
uint32_t addr, data; \
|
||||
uint32_t *qd; \
|
||||
int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
|
||||
MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
|
||||
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
|
||||
if ((mask & 1) == 0) { \
|
||||
/* ECI says skip this beat */ \
|
||||
@ -692,7 +732,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
|
||||
addr = base + off[beat]; \
|
||||
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
|
||||
data = qd[H4(off[beat] >> 3)]; \
|
||||
cpu_stl_le_data_ra(env, addr, data, GETPC()); \
|
||||
cpu_stl_mmu(env, addr, data, oi, GETPC()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -2164,27 +2204,6 @@ DO_VSHLL_ALL(vshllt, true)
|
||||
DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \
|
||||
DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
|
||||
|
||||
static inline uint64_t do_urshr(uint64_t x, unsigned sh)
|
||||
{
|
||||
if (likely(sh < 64)) {
|
||||
return (x >> sh) + ((x >> (sh - 1)) & 1);
|
||||
} else if (sh == 64) {
|
||||
return x >> 63;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int64_t do_srshr(int64_t x, unsigned sh)
|
||||
{
|
||||
if (likely(sh < 64)) {
|
||||
return (x >> sh) + ((x >> (sh - 1)) & 1);
|
||||
} else {
|
||||
/* Rounding the sign bit always produces 0. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
DO_VSHRN_ALL(vshrn, DO_SHR)
|
||||
DO_VSHRN_ALL(vrshrn, do_urshr)
|
||||
|
||||
|
@ -228,16 +228,31 @@ NEON_VOP(rshl_s16, neon_s16, 2)
|
||||
NEON_GVEC_VOP2(gvec_srshl_h, int16_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_sqrshl_bhs(src1, src2, 16, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_srshl_h, int16_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, NULL))
|
||||
NEON_GVEC_VOP2(gvec_srshl_s, int32_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_sqrshl_bhs(src1, src2, 32, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_srshl_s, int32_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_sqrshl_d(src1, (int8_t)src2, true, NULL))
|
||||
NEON_GVEC_VOP2(gvec_srshl_d, int64_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_sqrshl_d(src1, src2, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_srshl_d, int64_t)
|
||||
#undef NEON_FN
|
||||
|
||||
uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift)
|
||||
{
|
||||
return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
|
||||
@ -260,16 +275,31 @@ NEON_VOP(rshl_u16, neon_u16, 2)
|
||||
NEON_GVEC_VOP2(gvec_urshl_h, uint16_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_uqrshl_bhs(src1, (int16_t)src2, 16, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_urshl_h, uint16_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, NULL))
|
||||
NEON_GVEC_VOP2(gvec_urshl_s, int32_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_uqrshl_bhs(src1, src2, 32, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_urshl_s, int32_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_uqrshl_d(src1, (int8_t)src2, true, NULL))
|
||||
NEON_GVEC_VOP2(gvec_urshl_d, int64_t)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) \
|
||||
(dest = do_uqrshl_d(src1, src2, true, NULL))
|
||||
NEON_GVEC_VOP2(sme2_urshl_d, int64_t)
|
||||
#undef NEON_FN
|
||||
|
||||
uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift)
|
||||
{
|
||||
return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
|
||||
|
@ -22,30 +22,139 @@
|
||||
### SME Misc
|
||||
|
||||
ZERO 11000000 00 001 00000000000 imm:8
|
||||
ZERO_zt0 11000000 01 001 00000000000 00000001
|
||||
|
||||
### SME Move into/from Array
|
||||
|
||||
%mova_rs 13:2 !function=plus_12
|
||||
&mova esz rs pg zr za_imm v:bool to_vec:bool
|
||||
%mova_rv 13:2 !function=plus_8
|
||||
&mova_a rv zr off
|
||||
&mova_p esz rs pg zr za off v:bool
|
||||
&mova_t esz rs zr za off v:bool
|
||||
|
||||
MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
|
||||
&mova to_vec=0 rs=%mova_rs
|
||||
MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
|
||||
&mova to_vec=0 rs=%mova_rs esz=4
|
||||
MOVA_tz 11000000 00 00000 0 v:1 .. pg:3 zr:5 0 off:4 \
|
||||
&mova_p rs=%mova_rs esz=0 za=0
|
||||
MOVA_tz 11000000 01 00000 0 v:1 .. pg:3 zr:5 0 za:1 off:3 \
|
||||
&mova_p rs=%mova_rs esz=1
|
||||
MOVA_tz 11000000 10 00000 0 v:1 .. pg:3 zr:5 0 za:2 off:2 \
|
||||
&mova_p rs=%mova_rs esz=2
|
||||
MOVA_tz 11000000 11 00000 0 v:1 .. pg:3 zr:5 0 za:3 off:1 \
|
||||
&mova_p rs=%mova_rs esz=3
|
||||
MOVA_tz 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za:4 \
|
||||
&mova_p rs=%mova_rs esz=4 off=0
|
||||
|
||||
MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
|
||||
&mova to_vec=1 rs=%mova_rs
|
||||
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
|
||||
&mova to_vec=1 rs=%mova_rs esz=4
|
||||
MOVA_zt 11000000 00 00001 0 v:1 .. pg:3 0 off:4 zr:5 \
|
||||
&mova_p rs=%mova_rs esz=0 za=0
|
||||
MOVA_zt 11000000 01 00001 0 v:1 .. pg:3 0 za:1 off:3 zr:5 \
|
||||
&mova_p rs=%mova_rs esz=1
|
||||
MOVA_zt 11000000 10 00001 0 v:1 .. pg:3 0 za:2 off:2 zr:5 \
|
||||
&mova_p rs=%mova_rs esz=2
|
||||
MOVA_zt 11000000 11 00001 0 v:1 .. pg:3 0 za:3 off:1 zr:5 \
|
||||
&mova_p rs=%mova_rs esz=3
|
||||
MOVA_zt 11000000 11 00001 1 v:1 .. pg:3 0 za:4 zr:5 \
|
||||
&mova_p rs=%mova_rs esz=4 off=0
|
||||
|
||||
MOVA_tz2 11000000 00 00010 0 v:1 .. 000 zr:4 0 00 off:3 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVA_tz2 11000000 01 00010 0 v:1 .. 000 zr:4 0 00 za:1 off:2 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVA_tz2 11000000 10 00010 0 v:1 .. 000 zr:4 0 00 za:2 off:1 \
|
||||
&mova_t rs=%mova_rs esz=2
|
||||
MOVA_tz2 11000000 11 00010 0 v:1 .. 000 zr:4 0 00 za:3 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
MOVA_zt2 11000000 00 00011 0 v:1 .. 000 00 off:3 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVA_zt2 11000000 01 00011 0 v:1 .. 000 00 za:1 off:2 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVA_zt2 11000000 10 00011 0 v:1 .. 000 00 za:2 off:1 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=2
|
||||
MOVA_zt2 11000000 11 00011 0 v:1 .. 000 00 za:3 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
MOVA_tz4 11000000 00 00010 0 v:1 .. 001 zr:3 00 000 off:2 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVA_tz4 11000000 01 00010 0 v:1 .. 001 zr:3 00 000 za:1 off:1 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVA_tz4 11000000 10 00010 0 v:1 .. 001 zr:3 00 000 za:2 \
|
||||
&mova_t rs=%mova_rs esz=2 off=0
|
||||
MOVA_tz4 11000000 11 00010 0 v:1 .. 001 zr:3 00 00 za:3 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
MOVA_zt4 11000000 00 00011 0 v:1 .. 001 000 off:2 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVA_zt4 11000000 01 00011 0 v:1 .. 001 000 za:1 off:1 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVA_zt4 11000000 10 00011 0 v:1 .. 001 000 za:2 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=2 off=0
|
||||
MOVA_zt4 11000000 11 00011 0 v:1 .. 001 00 za:3 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
MOVA_az2 11000000 00 00010 00 .. 010 zr:4 000 off:3 \
|
||||
&mova_a rv=%mova_rv
|
||||
MOVA_az4 11000000 00 00010 00 .. 011 zr:3 0000 off:3 \
|
||||
&mova_a rv=%mova_rv
|
||||
|
||||
MOVA_za2 11000000 00 00011 00 .. 010 00 off:3 zr:4 0 \
|
||||
&mova_a rv=%mova_rv
|
||||
MOVA_za4 11000000 00 00011 00 .. 011 00 off:3 zr:3 00 \
|
||||
&mova_a rv=%mova_rv
|
||||
|
||||
### SME Move and Zero
|
||||
|
||||
MOVAZ_za2 11000000 00000110 0 .. 01010 off:3 zr:4 0 \
|
||||
&mova_a rv=%mova_rv
|
||||
MOVAZ_za4 11000000 00000110 0 .. 01110 off:3 zr:3 00 \
|
||||
&mova_a rv=%mova_rv
|
||||
|
||||
MOVAZ_zt 11000000 00 00001 0 v:1 .. 0001 off:4 zr:5 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVAZ_zt 11000000 01 00001 0 v:1 .. 0001 za:1 off:3 zr:5 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVAZ_zt 11000000 10 00001 0 v:1 .. 0001 za:2 off:2 zr:5 \
|
||||
&mova_t rs=%mova_rs esz=2
|
||||
MOVAZ_zt 11000000 11 00001 0 v:1 .. 0001 za:3 off:1 zr:5 \
|
||||
&mova_t rs=%mova_rs esz=3
|
||||
MOVAZ_zt 11000000 11 00001 1 v:1 .. 0001 za:4 zr:5 \
|
||||
&mova_t rs=%mova_rs esz=4 off=0
|
||||
|
||||
MOVAZ_zt2 11000000 00 00011 0 v:1 .. 00010 off:3 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVAZ_zt2 11000000 01 00011 0 v:1 .. 00010 za:1 off:2 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVAZ_zt2 11000000 10 00011 0 v:1 .. 00010 za:2 off:1 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=2
|
||||
MOVAZ_zt2 11000000 11 00011 0 v:1 .. 00010 za:3 zr:4 0 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
MOVAZ_zt4 11000000 00 00011 0 v:1 .. 001100 off:2 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=0 za=0
|
||||
MOVAZ_zt4 11000000 01 00011 0 v:1 .. 001100 za:1 off:1 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=1
|
||||
MOVAZ_zt4 11000000 10 00011 0 v:1 .. 001100 za:2 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=2 off=0
|
||||
MOVAZ_zt4 11000000 11 00011 0 v:1 .. 00110 za:3 zr:3 00 \
|
||||
&mova_t rs=%mova_rs esz=3 off=0
|
||||
|
||||
### SME Move into/from ZT0
|
||||
|
||||
MOVT_rzt 1100 0000 0100 1100 0 off:3 00 11111 rt:5
|
||||
MOVT_ztr 1100 0000 0100 1110 0 off:3 00 11111 rt:5
|
||||
|
||||
### SME Memory
|
||||
|
||||
&ldst esz rs pg rn rm za_imm v:bool st:bool
|
||||
&ldst esz rs pg rn rm za off v:bool st:bool
|
||||
|
||||
LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
|
||||
&ldst rs=%mova_rs
|
||||
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
|
||||
&ldst esz=4 rs=%mova_rs
|
||||
LDST1 1110000 0 00 st:1 rm:5 v:1 .. pg:3 rn:5 0 off:4 \
|
||||
&ldst rs=%mova_rs esz=0 za=0
|
||||
LDST1 1110000 0 01 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:1 off:3 \
|
||||
&ldst rs=%mova_rs esz=1
|
||||
LDST1 1110000 0 10 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:2 off:2 \
|
||||
&ldst rs=%mova_rs esz=2
|
||||
LDST1 1110000 0 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:3 off:1 \
|
||||
&ldst rs=%mova_rs esz=3
|
||||
LDST1 1110000 1 11 st:1 rm:5 v:1 .. pg:3 rn:5 0 za:4 \
|
||||
&ldst rs=%mova_rs esz=4 off=0
|
||||
|
||||
&ldstr rv rn imm
|
||||
@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
|
||||
@ -54,6 +163,12 @@ LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
|
||||
LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
|
||||
STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
|
||||
|
||||
&ldstzt0 rn
|
||||
@ldstzt0 ....... ... . ...... .. ... rn:5 ..... &ldstzt0
|
||||
|
||||
LDR_zt0 1110000 100 0 111111 00 000 ..... 00000 @ldstzt0
|
||||
STR_zt0 1110000 100 1 111111 00 000 ..... 00000 @ldstzt0
|
||||
|
||||
### SME Add Vector to Array
|
||||
|
||||
&adda zad zn pm pn
|
||||
@ -68,14 +183,18 @@ ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
|
||||
### SME Outer Product
|
||||
|
||||
&op zad zn zm pm pn sub:bool
|
||||
@op_16 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 ... zad:1 &op
|
||||
@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
|
||||
@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
|
||||
|
||||
FMOPA_h 10000001 100 ..... ... ... ..... . 100 . @op_16
|
||||
FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
|
||||
FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
|
||||
|
||||
BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
|
||||
FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
|
||||
BFMOPA 10000001 101 ..... ... ... ..... . 100 . @op_16
|
||||
|
||||
BFMOPA_w 10000001 100 ..... ... ... ..... . 00 .. @op_32
|
||||
FMOPA_w_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
|
||||
|
||||
SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
|
||||
SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
|
||||
@ -86,3 +205,789 @@ SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
|
||||
SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
|
||||
USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
|
||||
UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
|
||||
|
||||
BMOPA 1000000 0 10 0 ..... ... ... ..... . 10 .. @op_32
|
||||
SMOPA2_s 1010000 0 10 0 ..... ... ... ..... . 10 .. @op_32
|
||||
UMOPA2_s 1010000 1 10 0 ..... ... ... ..... . 10 .. @op_32
|
||||
|
||||
### SME2 Multi-vector Multiple and Single SVE Destructive
|
||||
|
||||
%zd_ax2 1:4 !function=times_2
|
||||
%zd_ax4 2:3 !function=times_4
|
||||
|
||||
&z2z_en zdn zm esz n
|
||||
@z2z_2x1 ....... . esz:2 .. zm:4 ....0. ..... .... . \
|
||||
&z2z_en n=2 zdn=%zd_ax2
|
||||
@z2z_4x1 ....... . esz:2 .. zm:4 ....1. ..... ...0 . \
|
||||
&z2z_en n=4 zdn=%zd_ax4
|
||||
|
||||
SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_2x1
|
||||
SMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 0 @z2z_4x1
|
||||
UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_2x1
|
||||
UMAX_n1 1100000 1 .. 10 .... 1010.0 00000 .... 1 @z2z_4x1
|
||||
SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_2x1
|
||||
SMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 0 @z2z_4x1
|
||||
UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_2x1
|
||||
UMIN_n1 1100000 1 .. 10 .... 1010.0 00001 .... 1 @z2z_4x1
|
||||
|
||||
FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_2x1
|
||||
FMAX_n1 1100000 1 .. 10 .... 1010.0 01000 .... 0 @z2z_4x1
|
||||
FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_2x1
|
||||
FMIN_n1 1100000 1 .. 10 .... 1010.0 01000 .... 1 @z2z_4x1
|
||||
FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_2x1
|
||||
FMAXNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 0 @z2z_4x1
|
||||
FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_2x1
|
||||
FMINNM_n1 1100000 1 .. 10 .... 1010.0 01001 .... 1 @z2z_4x1
|
||||
|
||||
SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_2x1
|
||||
SRSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 0 @z2z_4x1
|
||||
URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_2x1
|
||||
URSHL_n1 1100000 1 .. 10 .... 1010.0 10001 .... 1 @z2z_4x1
|
||||
|
||||
ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_2x1
|
||||
ADD_n1 1100000 1 .. 10 .... 1010.0 11000 .... 0 @z2z_4x1
|
||||
|
||||
SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_2x1
|
||||
SQDMULH_n1 1100000 1 .. 10 .... 1010.1 00000 .... 0 @z2z_4x1
|
||||
|
||||
### SME2 Multi-vector Multiple Vectors SVE Destructive
|
||||
|
||||
%zm_ax2 17:4 !function=times_2
|
||||
%zm_ax4 18:3 !function=times_4
|
||||
|
||||
@z2z_2x2 ....... . esz:2 . ....0 ....0. ..... .... . \
|
||||
&z2z_en n=2 zdn=%zd_ax2 zm=%zm_ax2
|
||||
@z2z_4x4 ....... . esz:2 . ...00 ....1. ..... ...0 . \
|
||||
&z2z_en n=4 zdn=%zd_ax4 zm=%zm_ax4
|
||||
|
||||
SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_2x2
|
||||
SMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 0 @z2z_4x4
|
||||
UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_2x2
|
||||
UMAX_nn 1100000 1 .. 1 ..... 1011.0 00000 .... 1 @z2z_4x4
|
||||
SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_2x2
|
||||
SMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 0 @z2z_4x4
|
||||
UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_2x2
|
||||
UMIN_nn 1100000 1 .. 1 ..... 1011.0 00001 .... 1 @z2z_4x4
|
||||
|
||||
FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_2x2
|
||||
FMAX_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 0 @z2z_4x4
|
||||
FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_2x2
|
||||
FMIN_nn 1100000 1 .. 1 ..... 1011.0 01000 .... 1 @z2z_4x4
|
||||
FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_2x2
|
||||
FMAXNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 0 @z2z_4x4
|
||||
FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_2x2
|
||||
FMINNM_nn 1100000 1 .. 1 ..... 1011.0 01001 .... 1 @z2z_4x4
|
||||
|
||||
SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_2x2
|
||||
SRSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 0 @z2z_4x4
|
||||
URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_2x2
|
||||
URSHL_nn 1100000 1 .. 1 ..... 1011.0 10001 .... 1 @z2z_4x4
|
||||
|
||||
SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_2x2
|
||||
SQDMULH_nn 1100000 1 .. 1 ..... 1011.1 00000 .... 0 @z2z_4x4
|
||||
|
||||
### SME2 Multi-vector Multiple and Single Array Vectors
|
||||
|
||||
&azz_n n off rv zn zm
|
||||
@azz_nx1_o3 ........ .... zm:4 ...... zn:5 .. off:3 &azz_n rv=%mova_rv
|
||||
|
||||
ADD_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2
|
||||
ADD_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4
|
||||
ADD_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=2
|
||||
ADD_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 10 ... @azz_nx1_o3 n=4
|
||||
|
||||
SUB_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2
|
||||
SUB_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4
|
||||
SUB_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2
|
||||
SUB_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4
|
||||
|
||||
%off3_x2 0:3 !function=times_2
|
||||
%off2_x2 0:2 !function=times_2
|
||||
|
||||
@azz_nx1_o3x2 ........ ... . zm:4 . .. ... zn:5 .. ... \
|
||||
&azz_n off=%off3_x2 rv=%mova_rv
|
||||
@azz_nx1_o2x2 ........ ... . zm:4 . .. ... zn:5 ... .. \
|
||||
&azz_n off=%off2_x2 rv=%mova_rv
|
||||
|
||||
FMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1
|
||||
FMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2
|
||||
FMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
FMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1
|
||||
FMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2
|
||||
FMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
BFMLAL_n1 11000001 001 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1
|
||||
BFMLAL_n1 11000001 001 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2
|
||||
BFMLAL_n1 11000001 001 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
BFMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1
|
||||
BFMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2
|
||||
BFMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
FDOT_n1 11000001 001 0 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=2
|
||||
FDOT_n1 11000001 001 1 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=4
|
||||
|
||||
BFDOT_n1 11000001 001 0 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=2
|
||||
BFDOT_n1 11000001 001 1 .... 0 .. 100 ..... 10 ... @azz_nx1_o3 n=4
|
||||
|
||||
USDOT_n1 11000001 001 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2
|
||||
USDOT_n1 11000001 001 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4
|
||||
|
||||
SUDOT_n1 11000001 001 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2
|
||||
SUDOT_n1 11000001 001 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4
|
||||
|
||||
SDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2
|
||||
SDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4
|
||||
SDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=2
|
||||
SDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 00 ... @azz_nx1_o3 n=4
|
||||
SDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=2
|
||||
SDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 01 ... @azz_nx1_o3 n=4
|
||||
|
||||
UDOT_n1_4b 11000001 001 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2
|
||||
UDOT_n1_4b 11000001 001 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4
|
||||
UDOT_n1_4h 11000001 011 0 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=2
|
||||
UDOT_n1_4h 11000001 011 1 .... 0 .. 101 ..... 10 ... @azz_nx1_o3 n=4
|
||||
UDOT_n1_2h 11000001 011 0 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=2
|
||||
UDOT_n1_2h 11000001 011 1 .... 0 .. 101 ..... 11 ... @azz_nx1_o3 n=4
|
||||
|
||||
SMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 00 ... @azz_nx1_o3x2 n=1
|
||||
SMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=2
|
||||
SMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 000 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
SMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 01 ... @azz_nx1_o3x2 n=1
|
||||
SMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=2
|
||||
SMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 010 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
UMLAL_n1 11000001 011 0 .... 0 .. 011 ..... 10 ... @azz_nx1_o3x2 n=1
|
||||
UMLAL_n1 11000001 011 0 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=2
|
||||
UMLAL_n1 11000001 011 1 .... 0 .. 010 ..... 100 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
UMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1
|
||||
UMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2
|
||||
UMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4
|
||||
|
||||
%off2_x4 0:2 !function=times_4
|
||||
%off1_x4 0:1 !function=times_4
|
||||
|
||||
@azz_nx1_o2x4 ........ ... . zm:4 . .. ... zn:5 ... .. \
|
||||
&azz_n off=%off2_x4 rv=%mova_rv
|
||||
@azz_nx1_o1x4 ........ ... . zm:4 . .. ... zn:5 .... . \
|
||||
&azz_n off=%off1_x4 rv=%mova_rv
|
||||
|
||||
SMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1
|
||||
SMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1
|
||||
SMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2
|
||||
SMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2
|
||||
SMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4
|
||||
SMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4
|
||||
|
||||
SMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1
|
||||
SMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1
|
||||
SMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2
|
||||
SMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2
|
||||
SMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4
|
||||
SMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4
|
||||
|
||||
UMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1
|
||||
UMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1
|
||||
UMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2
|
||||
UMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2
|
||||
UMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4
|
||||
UMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4
|
||||
|
||||
UMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1
|
||||
UMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1
|
||||
UMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2
|
||||
UMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2
|
||||
UMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4
|
||||
UMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4
|
||||
|
||||
USMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 001 .. @azz_nx1_o2x4 n=1
|
||||
USMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=2
|
||||
USMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4
|
||||
|
||||
SUMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2
|
||||
SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
|
||||
|
||||
BFMLA_n1 11000001 011 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2
|
||||
FMLA_n1_h 11000001 001 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2
|
||||
FMLA_n1_s 11000001 001 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
|
||||
FMLA_n1_d 11000001 011 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
|
||||
|
||||
BFMLA_n1 11000001 011 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4
|
||||
FMLA_n1_h 11000001 001 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4
|
||||
FMLA_n1_s 11000001 001 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
|
||||
FMLA_n1_d 11000001 011 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
|
||||
|
||||
BFMLS_n1 11000001 011 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2
|
||||
FMLS_n1_h 11000001 001 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2
|
||||
FMLS_n1_s 11000001 001 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
|
||||
FMLS_n1_d 11000001 011 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
|
||||
|
||||
BFMLS_n1 11000001 011 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4
|
||||
FMLS_n1_h 11000001 001 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4
|
||||
FMLS_n1_s 11000001 001 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
|
||||
FMLS_n1_d 11000001 011 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
|
||||
|
||||
### SME2 Multi-vector Multiple Array Vectors
|
||||
|
||||
%zn_ax2 6:4 !function=times_2
|
||||
%zn_ax4 7:3 !function=times_4
|
||||
|
||||
@azz_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \
|
||||
&azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2
|
||||
@azz_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \
|
||||
&azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4
|
||||
|
||||
ADD_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3
|
||||
ADD_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3
|
||||
ADD_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3
|
||||
ADD_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3
|
||||
|
||||
SUB_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3
|
||||
SUB_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3
|
||||
SUB_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3
|
||||
SUB_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3
|
||||
|
||||
@azz_2x2_o2x2 ........ ... ..... . .. ... ..... ... .. \
|
||||
&azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off2_x2
|
||||
@azz_4x4_o2x2 ........ ... ..... . .. ... ..... ... .. \
|
||||
&azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off2_x2
|
||||
|
||||
FMLAL_nn 11000001 101 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2
|
||||
FMLAL_nn 11000001 101 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2
|
||||
|
||||
FMLSL_nn 11000001 101 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2
|
||||
FMLSL_nn 11000001 101 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2
|
||||
|
||||
BFMLAL_nn 11000001 101 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2
|
||||
BFMLAL_nn 11000001 101 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2
|
||||
|
||||
BFMLSL_nn 11000001 101 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2
|
||||
BFMLSL_nn 11000001 101 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2
|
||||
|
||||
FDOT_nn 11000001 101 ....0 0 .. 100 ....0 00 ... @azz_2x2_o3
|
||||
FDOT_nn 11000001 101 ...01 0 .. 100 ...00 00 ... @azz_4x4_o3
|
||||
|
||||
BFDOT_nn 11000001 101 ....0 0 .. 100 ....0 10 ... @azz_2x2_o3
|
||||
BFDOT_nn 11000001 101 ...01 0 .. 100 ...00 10 ... @azz_4x4_o3
|
||||
|
||||
USDOT_nn 11000001 101 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3
|
||||
USDOT_nn 11000001 101 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3
|
||||
|
||||
SDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3
|
||||
SDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3
|
||||
SDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 00 ... @azz_2x2_o3
|
||||
SDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 00 ... @azz_4x4_o3
|
||||
SDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 01 ... @azz_2x2_o3
|
||||
SDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 01 ... @azz_4x4_o3
|
||||
|
||||
UDOT_nn_4b 11000001 101 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3
|
||||
UDOT_nn_4b 11000001 101 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3
|
||||
UDOT_nn_4h 11000001 111 ....0 0 .. 101 ....0 10 ... @azz_2x2_o3
|
||||
UDOT_nn_4h 11000001 111 ...01 0 .. 101 ...00 10 ... @azz_4x4_o3
|
||||
UDOT_nn_2h 11000001 111 ....0 0 .. 101 ....0 11 ... @azz_2x2_o3
|
||||
UDOT_nn_2h 11000001 111 ...01 0 .. 101 ...00 11 ... @azz_4x4_o3
|
||||
|
||||
SMLAL_nn 11000001 111 ....0 0 .. 010 ....0 000 .. @azz_2x2_o2x2
|
||||
SMLAL_nn 11000001 111 ...01 0 .. 010 ...00 000 .. @azz_4x4_o2x2
|
||||
|
||||
SMLSL_nn 11000001 111 ....0 0 .. 010 ....0 010 .. @azz_2x2_o2x2
|
||||
SMLSL_nn 11000001 111 ...01 0 .. 010 ...00 010 .. @azz_4x4_o2x2
|
||||
|
||||
UMLAL_nn 11000001 111 ....0 0 .. 010 ....0 100 .. @azz_2x2_o2x2
|
||||
UMLAL_nn 11000001 111 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2
|
||||
|
||||
UMLSL_nn 11000001 111 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2
|
||||
UMLSL_nn 11000001 111 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2
|
||||
|
||||
@azz_2x2_o1x4 ........ ... ..... . .. ... ..... ... .. \
|
||||
&azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off1_x4
|
||||
@azz_4x4_o1x4 ........ ... ..... . .. ... ..... ... .. \
|
||||
&azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off1_x4
|
||||
|
||||
SMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4
|
||||
SMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4
|
||||
SMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4
|
||||
SMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4
|
||||
|
||||
SMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4
|
||||
SMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4
|
||||
SMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4
|
||||
SMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4
|
||||
|
||||
UMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4
|
||||
UMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4
|
||||
UMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4
|
||||
UMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4
|
||||
|
||||
UMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4
|
||||
UMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4
|
||||
UMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4
|
||||
UMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4
|
||||
|
||||
USMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0010 . @azz_2x2_o1x4
|
||||
USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0010 . @azz_4x4_o1x4
|
||||
|
||||
BFMLA_nn 11000001 111 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3
|
||||
FMLA_nn_h 11000001 101 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3
|
||||
FMLA_nn_s 11000001 101 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
|
||||
FMLA_nn_d 11000001 111 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
|
||||
|
||||
BFMLA_nn 11000001 111 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3
|
||||
FMLA_nn_h 11000001 101 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3
|
||||
FMLA_nn_s 11000001 101 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
|
||||
FMLA_nn_d 11000001 111 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
|
||||
|
||||
BFMLS_nn 11000001 111 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3
|
||||
FMLS_nn_h 11000001 101 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3
|
||||
FMLS_nn_s 11000001 101 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
|
||||
FMLS_nn_d 11000001 111 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
|
||||
|
||||
BFMLS_nn 11000001 111 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
|
||||
FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
|
||||
FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
|
||||
FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
|
||||
|
||||
&az_n n off rv zm
|
||||
@az_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \
|
||||
&az_n n=2 rv=%mova_rv zm=%zn_ax2
|
||||
@az_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \
|
||||
&az_n n=4 rv=%mova_rv zm=%zn_ax4
|
||||
|
||||
FADD_nn_h 11000001 101 00100 0 .. 111 ....0 00 ... @az_2x2_o3
|
||||
FADD_nn_s 11000001 101 00000 0 .. 111 ....0 00 ... @az_2x2_o3
|
||||
FADD_nn_d 11000001 111 00000 0 .. 111 ....0 00 ... @az_2x2_o3
|
||||
FADD_nn_h 11000001 101 00101 0 .. 111 ...00 00 ... @az_4x4_o3
|
||||
FADD_nn_s 11000001 101 00001 0 .. 111 ...00 00 ... @az_4x4_o3
|
||||
FADD_nn_d 11000001 111 00001 0 .. 111 ...00 00 ... @az_4x4_o3
|
||||
|
||||
FSUB_nn_h 11000001 101 00100 0 .. 111 ....0 01 ... @az_2x2_o3
|
||||
FSUB_nn_s 11000001 101 00000 0 .. 111 ....0 01 ... @az_2x2_o3
|
||||
FSUB_nn_d 11000001 111 00000 0 .. 111 ....0 01 ... @az_2x2_o3
|
||||
FSUB_nn_h 11000001 101 00101 0 .. 111 ...00 01 ... @az_4x4_o3
|
||||
FSUB_nn_s 11000001 101 00001 0 .. 111 ...00 01 ... @az_4x4_o3
|
||||
FSUB_nn_d 11000001 111 00001 0 .. 111 ...00 01 ... @az_4x4_o3
|
||||
|
||||
BFADD_nn 11000001 111 00100 0 .. 111 ....0 00 ... @az_2x2_o3
|
||||
BFADD_nn 11000001 111 00101 0 .. 111 ...00 00 ... @az_4x4_o3
|
||||
BFSUB_nn 11000001 111 00100 0 .. 111 ....0 01 ... @az_2x2_o3
|
||||
BFSUB_nn 11000001 111 00101 0 .. 111 ...00 01 ... @az_4x4_o3
|
||||
|
||||
### SME2 Multi-vector Indexed
|
||||
|
||||
&azx_n n off rv zn zm idx
|
||||
|
||||
%idx3_15_10 15:1 10:2
|
||||
%idx2_10_2 10:2 2:1
|
||||
|
||||
@azx_1x1_o3x2 ........ .... zm:4 . .. . .. zn:5 .. ... \
|
||||
&azx_n n=1 rv=%mova_rv off=%off3_x2 idx=%idx3_15_10
|
||||
@azx_2x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \
|
||||
&azx_n n=2 rv=%mova_rv off=%off2_x2 zn=%zn_ax2 idx=%idx2_10_2
|
||||
@azx_4x1_o2x2 ........ .... zm:4 . .. . .. ..... .. ... \
|
||||
&azx_n n=4 rv=%mova_rv off=%off2_x2 zn=%zn_ax4 idx=%idx2_10_2
|
||||
|
||||
FMLAL_nx 11000001 1000 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2
|
||||
FMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2
|
||||
FMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2
|
||||
|
||||
FMLSL_nx 11000001 1000 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2
|
||||
FMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2
|
||||
FMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2
|
||||
|
||||
BFMLAL_nx 11000001 1000 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2
|
||||
BFMLAL_nx 11000001 1001 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2
|
||||
BFMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2
|
||||
|
||||
BFMLSL_nx 11000001 1000 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2
|
||||
BFMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2
|
||||
BFMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2
|
||||
|
||||
@azx_2x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \
|
||||
&azx_n n=2 rv=%mova_rv zn=%zn_ax2
|
||||
@azx_4x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \
|
||||
&azx_n n=4 rv=%mova_rv zn=%zn_ax4
|
||||
@azx_2x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \
|
||||
&azx_n n=2 rv=%mova_rv zn=%zn_ax2
|
||||
@azx_4x1_i1_o3 ........ .... zm:4 . .. .. idx:1 .... ... off:3 \
|
||||
&azx_n n=4 rv=%mova_rv zn=%zn_ax4
|
||||
|
||||
FDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_i2_o3
|
||||
FDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_i2_o3
|
||||
|
||||
BFDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_i2_o3
|
||||
BFDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_i2_o3
|
||||
|
||||
FVDOT 11000001 0101 .... 0 .. 0 .. ....0 01 ... @azx_2x1_i2_o3
|
||||
BFVDOT 11000001 0101 .... 0 .. 0 .. ....0 11 ... @azx_2x1_i2_o3
|
||||
|
||||
SDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_i2_o3
|
||||
SDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_i2_o3
|
||||
SDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 00 ... @azx_2x1_i2_o3
|
||||
SDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 00 ... @azx_4x1_i2_o3
|
||||
SDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 01 ... @azx_2x1_i1_o3
|
||||
SDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 01 ... @azx_4x1_i1_o3
|
||||
|
||||
UDOT_nx_2h 11000001 0101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_i2_o3
|
||||
UDOT_nx_2h 11000001 0101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_i2_o3
|
||||
UDOT_nx_4b 11000001 0101 .... 0 .. 1 .. ....1 10 ... @azx_2x1_i2_o3
|
||||
UDOT_nx_4b 11000001 0101 .... 1 .. 1 .. ...01 10 ... @azx_4x1_i2_o3
|
||||
UDOT_nx_4h 11000001 1101 .... 0 .. 00 . ....0 11 ... @azx_2x1_i1_o3
|
||||
UDOT_nx_4h 11000001 1101 .... 1 .. 00 . ...00 11 ... @azx_4x1_i1_o3
|
||||
|
||||
USDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 01 ... @azx_2x1_i2_o3
|
||||
USDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 01 ... @azx_4x1_i2_o3
|
||||
|
||||
SUDOT_nx 11000001 0101 .... 0 .. 1 .. ....1 11 ... @azx_2x1_i2_o3
|
||||
SUDOT_nx 11000001 0101 .... 1 .. 1 .. ...01 11 ... @azx_4x1_i2_o3
|
||||
|
||||
SVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 00 ... @azx_2x1_i2_o3
|
||||
SVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 00 ... @azx_4x1_i2_o3
|
||||
SVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 01 ... @azx_4x1_i1_o3
|
||||
|
||||
UVDOT_nx_2h 11000001 0101 .... 0 .. 0 .. ....1 10 ... @azx_2x1_i2_o3
|
||||
UVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 10 ... @azx_4x1_i2_o3
|
||||
UVDOT_nx_4h 11000001 1101 .... 1 .. 01 . ...00 11 ... @azx_4x1_i1_o3
|
||||
|
||||
SUVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 11 ... @azx_4x1_i2_o3
|
||||
USVDOT_nx_4b 11000001 0101 .... 1 .. 0 .. ...01 01 ... @azx_4x1_i2_o3
|
||||
|
||||
SMLAL_nx 11000001 1100 .... . .. 1 .. ..... 00 ... @azx_1x1_o3x2
|
||||
SMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 00 ... @azx_2x1_o2x2
|
||||
SMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 00 ... @azx_4x1_o2x2
|
||||
|
||||
SMLSL_nx 11000001 1100 .... . .. 1 .. ..... 01 ... @azx_1x1_o3x2
|
||||
SMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_o2x2
|
||||
SMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_o2x2
|
||||
|
||||
UMLAL_nx 11000001 1100 .... . .. 1 .. ..... 10 ... @azx_1x1_o3x2
|
||||
UMLAL_nx 11000001 1101 .... 0 .. 1 .. ....0 10 ... @azx_2x1_o2x2
|
||||
UMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2
|
||||
|
||||
UMLSL_nx 11000001 1100 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2
|
||||
UMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2
|
||||
UMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2
|
||||
|
||||
%idx4_15_10 15:1 10:3
|
||||
%idx4_10_1 10:2 1:2
|
||||
%idx3_10_1 10:1 1:2
|
||||
|
||||
@azx_1x1_i4_o2 ........ .... zm:4 . .. ... zn:5 ... .. \
|
||||
&azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx4_15_10
|
||||
@azx_1x1_i3_o2 ........ .... zm:4 . .. ... zn:5 ... .. \
|
||||
&azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx3_15_10
|
||||
@azx_2x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \
|
||||
&azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx4_10_1
|
||||
@azx_2x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \
|
||||
&azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx3_10_1
|
||||
@azx_4x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \
|
||||
&azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx4_10_1
|
||||
@azx_4x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \
|
||||
&azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx3_10_1
|
||||
|
||||
SMLALL_nx_s 11000001 0000 .... . .. ... ..... 000 .. @azx_1x1_i4_o2
|
||||
SMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 000 .. @azx_1x1_i3_o2
|
||||
SMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 00 ... @azx_2x1_i4_o1
|
||||
SMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 00 ... @azx_2x1_i3_o1
|
||||
SMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 00 ... @azx_4x1_i4_o1
|
||||
SMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 00 ... @azx_4x1_i3_o1
|
||||
|
||||
SMLSLL_nx_s 11000001 0000 .... . .. ... ..... 010 .. @azx_1x1_i4_o2
|
||||
SMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 010 .. @azx_1x1_i3_o2
|
||||
SMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 01 ... @azx_2x1_i4_o1
|
||||
SMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 01 ... @azx_2x1_i3_o1
|
||||
SMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 01 ... @azx_4x1_i4_o1
|
||||
SMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 01 ... @azx_4x1_i3_o1
|
||||
|
||||
UMLALL_nx_s 11000001 0000 .... . .. ... ..... 100 .. @azx_1x1_i4_o2
|
||||
UMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 100 .. @azx_1x1_i3_o2
|
||||
UMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 10 ... @azx_2x1_i4_o1
|
||||
UMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 10 ... @azx_2x1_i3_o1
|
||||
UMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 10 ... @azx_4x1_i4_o1
|
||||
UMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 10 ... @azx_4x1_i3_o1
|
||||
|
||||
UMLSLL_nx_s 11000001 0000 .... . .. ... ..... 110 .. @azx_1x1_i4_o2
|
||||
UMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 110 .. @azx_1x1_i3_o2
|
||||
UMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 11 ... @azx_2x1_i4_o1
|
||||
UMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 11 ... @azx_2x1_i3_o1
|
||||
UMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 11 ... @azx_4x1_i4_o1
|
||||
UMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 11 ... @azx_4x1_i3_o1
|
||||
|
||||
USMLALL_nx_s 11000001 0000 .... . .. ... ..... 001 .. @azx_1x1_i4_o2
|
||||
USMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 00 ... @azx_2x1_i4_o1
|
||||
USMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 00 ... @azx_4x1_i4_o1
|
||||
|
||||
SUMLALL_nx_s 11000001 0000 .... . .. ... ..... 101 .. @azx_1x1_i4_o2
|
||||
SUMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 10 ... @azx_2x1_i4_o1
|
||||
SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1
|
||||
|
||||
%idx3_10_3 10:2 3:1
|
||||
@azx_2x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \
|
||||
&azx_n n=2 rv=%mova_rv zn=%zn_ax2 idx=%idx3_10_3
|
||||
@azx_4x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \
|
||||
&azx_n n=4 rv=%mova_rv zn=%zn_ax4 idx=%idx3_10_3
|
||||
|
||||
BFMLA_nx 11000001 0001 .... 0 .. 1.. ....1 0 .... @azx_2x1_i3_o3
|
||||
FMLA_nx_h 11000001 0001 .... 0 .. 1.. ....0 0 .... @azx_2x1_i3_o3
|
||||
FMLA_nx_s 11000001 0101 .... 0 .. 0.. ....0 00 ... @azx_2x1_i2_o3
|
||||
FMLA_nx_d 11000001 1101 .... 0 .. 00. ....0 00 ... @azx_2x1_i1_o3
|
||||
|
||||
BFMLA_nx 11000001 0001 .... 1 .. 1.. ...01 0 .... @azx_4x1_i3_o3
|
||||
FMLA_nx_h 11000001 0001 .... 1 .. 1.. ...00 0 .... @azx_4x1_i3_o3
|
||||
FMLA_nx_s 11000001 0101 .... 1 .. 0.. ...00 00 ... @azx_4x1_i2_o3
|
||||
FMLA_nx_d 11000001 1101 .... 1 .. 00. ...00 00 ... @azx_4x1_i1_o3
|
||||
|
||||
BFMLS_nx 11000001 0001 .... 0 .. 1.. ....1 1 .... @azx_2x1_i3_o3
|
||||
FMLS_nx_h 11000001 0001 .... 0 .. 1.. ....0 1 .... @azx_2x1_i3_o3
|
||||
FMLS_nx_s 11000001 0101 .... 0 .. 0.. ....0 10 ... @azx_2x1_i2_o3
|
||||
FMLS_nx_d 11000001 1101 .... 0 .. 00. ....0 10 ... @azx_2x1_i1_o3
|
||||
|
||||
BFMLS_nx 11000001 0001 .... 1 .. 1.. ...01 1 .... @azx_4x1_i3_o3
|
||||
FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3
|
||||
FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3
|
||||
FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3
|
||||
|
||||
### SME2 Add / Sub array accumulators
|
||||
|
||||
ADD_aaz_s 11000001 101 000000 .. 111 ....0 10 ... @az_2x2_o3
|
||||
ADD_aaz_s 11000001 101 000010 .. 111 ...00 10 ... @az_4x4_o3
|
||||
ADD_aaz_d 11000001 111 000000 .. 111 ....0 10 ... @az_2x2_o3
|
||||
ADD_aaz_d 11000001 111 000010 .. 111 ...00 10 ... @az_4x4_o3
|
||||
|
||||
SUB_aaz_s 11000001 101 000000 .. 111 ....0 11 ... @az_2x2_o3
|
||||
SUB_aaz_s 11000001 101 000010 .. 111 ...00 11 ... @az_4x4_o3
|
||||
SUB_aaz_d 11000001 111 000000 .. 111 ....0 11 ... @az_2x2_o3
|
||||
SUB_aaz_d 11000001 111 000010 .. 111 ...00 11 ... @az_4x4_o3
|
||||
|
||||
### SME2 Multi-vector SVE Constructive Unary
|
||||
|
||||
&zz_e zd zn esz
|
||||
&zz_n zd zn n
|
||||
@zz_1x2 ........ ... ..... ...... ..... zd:5 \
|
||||
&zz_n n=1 zn=%zn_ax2
|
||||
@zz_1x4 ........ ... ..... ...... ..... zd:5 \
|
||||
&zz_n n=1 zn=%zn_ax4
|
||||
@zz_2x1 ........ ... ..... ...... zn:5 ..... \
|
||||
&zz_n n=1 zd=%zd_ax2
|
||||
@zz_2x2 ........ ... ..... ...... .... . ..... \
|
||||
&zz_n n=2 zd=%zd_ax2 zn=%zn_ax2
|
||||
@zz_4x4 ........ ... ..... ...... .... . ..... \
|
||||
&zz_n n=4 zd=%zd_ax4 zn=%zn_ax4
|
||||
@zz_4x2_n1 ........ ... ..... ...... .... . ..... \
|
||||
&zz_n n=1 zd=%zd_ax4 zn=%zn_ax2
|
||||
|
||||
BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2
|
||||
BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2
|
||||
|
||||
FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2
|
||||
FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2
|
||||
|
||||
FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1
|
||||
FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1
|
||||
|
||||
FCVTZS 11000001 001 00001 111000 ....0 ....0 @zz_2x2
|
||||
FCVTZS 11000001 001 10001 111000 ...00 ...00 @zz_4x4
|
||||
FCVTZU 11000001 001 00001 111000 ....1 ....0 @zz_2x2
|
||||
FCVTZU 11000001 001 10001 111000 ...01 ...00 @zz_4x4
|
||||
|
||||
SCVTF 11000001 001 00010 111000 ....0 ....0 @zz_2x2
|
||||
SCVTF 11000001 001 10010 111000 ...00 ...00 @zz_4x4
|
||||
UCVTF 11000001 001 00010 111000 ....1 ....0 @zz_2x2
|
||||
UCVTF 11000001 001 10010 111000 ...01 ...00 @zz_4x4
|
||||
|
||||
FRINTN 11000001 101 01000 111000 ....0 ....0 @zz_2x2
|
||||
FRINTN 11000001 101 11000 111000 ...00 ...00 @zz_4x4
|
||||
FRINTP 11000001 101 01001 111000 ....0 ....0 @zz_2x2
|
||||
FRINTP 11000001 101 11001 111000 ...00 ...00 @zz_4x4
|
||||
FRINTM 11000001 101 01010 111000 ....0 ....0 @zz_2x2
|
||||
FRINTM 11000001 101 11010 111000 ...00 ...00 @zz_4x4
|
||||
FRINTA 11000001 101 01100 111000 ....0 ....0 @zz_2x2
|
||||
FRINTA 11000001 101 11100 111000 ...00 ...00 @zz_4x4
|
||||
|
||||
SQCVT_sh 11000001 001 00011 111000 ....0 ..... @zz_1x2
|
||||
UQCVT_sh 11000001 001 00011 111000 ....1 ..... @zz_1x2
|
||||
SQCVTU_sh 11000001 011 00011 111000 ....0 ..... @zz_1x2
|
||||
|
||||
SQCVT_sb 11000001 001 10011 111000 ...00 ..... @zz_1x4
|
||||
UQCVT_sb 11000001 001 10011 111000 ...01 ..... @zz_1x4
|
||||
SQCVTU_sb 11000001 011 10011 111000 ...00 ..... @zz_1x4
|
||||
|
||||
SQCVT_dh 11000001 101 10011 111000 ...00 ..... @zz_1x4
|
||||
UQCVT_dh 11000001 101 10011 111000 ...01 ..... @zz_1x4
|
||||
SQCVTU_dh 11000001 111 10011 111000 ...00 ..... @zz_1x4
|
||||
|
||||
SQCVTN_sb 11000001 001 10011 111000 ...10 ..... @zz_1x4
|
||||
UQCVTN_sb 11000001 001 10011 111000 ...11 ..... @zz_1x4
|
||||
SQCVTUN_sb 11000001 011 10011 111000 ...10 ..... @zz_1x4
|
||||
|
||||
SQCVTN_dh 11000001 101 10011 111000 ...10 ..... @zz_1x4
|
||||
UQCVTN_dh 11000001 101 10011 111000 ...11 ..... @zz_1x4
|
||||
SQCVTUN_dh 11000001 111 10011 111000 ...10 ..... @zz_1x4
|
||||
|
||||
SUNPK_2bh 11000001 011 00101 111000 ..... ....0 @zz_2x1
|
||||
SUNPK_2hs 11000001 101 00101 111000 ..... ....0 @zz_2x1
|
||||
SUNPK_2sd 11000001 111 00101 111000 ..... ....0 @zz_2x1
|
||||
|
||||
UUNPK_2bh 11000001 011 00101 111000 ..... ....1 @zz_2x1
|
||||
UUNPK_2hs 11000001 101 00101 111000 ..... ....1 @zz_2x1
|
||||
UUNPK_2sd 11000001 111 00101 111000 ..... ....1 @zz_2x1
|
||||
|
||||
SUNPK_4bh 11000001 011 10101 111000 ....0 ...00 @zz_4x2_n1
|
||||
SUNPK_4hs 11000001 101 10101 111000 ....0 ...00 @zz_4x2_n1
|
||||
SUNPK_4sd 11000001 111 10101 111000 ....0 ...00 @zz_4x2_n1
|
||||
|
||||
UUNPK_4bh 11000001 011 10101 111000 ....0 ...01 @zz_4x2_n1
|
||||
UUNPK_4hs 11000001 101 10101 111000 ....0 ...01 @zz_4x2_n1
|
||||
UUNPK_4sd 11000001 111 10101 111000 ....0 ...01 @zz_4x2_n1
|
||||
|
||||
ZIP_4 11000001 esz:2 1 10110 111000 ...00 ... 00 \
|
||||
&zz_e zd=%zd_ax4 zn=%zn_ax4
|
||||
ZIP_4 11000001 001 10111 111000 ...00 ... 00 \
|
||||
&zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4
|
||||
|
||||
UZP_4 11000001 esz:2 1 10110 111000 ...00 ... 10 \
|
||||
&zz_e zd=%zd_ax4 zn=%zn_ax4
|
||||
UZP_4 11000001 001 10111 111000 ...00 ... 10 \
|
||||
&zz_e esz=4 zd=%zd_ax4 zn=%zn_ax4
|
||||
|
||||
### SME2 Multi-vector SVE Constructive Binary
|
||||
|
||||
&rshr zd zn shift
|
||||
|
||||
%rshr_sh_shift 16:4 !function=rsub_16
|
||||
%rshr_sb_shift 16:5 !function=rsub_32
|
||||
%rshr_dh_shift 22:1 16:5 !function=rsub_64
|
||||
|
||||
@rshr_sh ........ .... .... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax2 shift=%rshr_sh_shift
|
||||
@rshr_sb ........ ... ..... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax4 shift=%rshr_sb_shift
|
||||
@rshr_dh ........ ... ..... ...... ..... zd:5 \
|
||||
&rshr zn=%zn_ax4 shift=%rshr_dh_shift
|
||||
|
||||
SQRSHR_sh 11000001 1110 .... 110101 ....0 ..... @rshr_sh
|
||||
UQRSHR_sh 11000001 1110 .... 110101 ....1 ..... @rshr_sh
|
||||
SQRSHRU_sh 11000001 1111 .... 110101 ....0 ..... @rshr_sh
|
||||
|
||||
SQRSHR_sb 11000001 011 ..... 110110 ...00 ..... @rshr_sb
|
||||
SQRSHR_dh 11000001 1.1 ..... 110110 ...00 ..... @rshr_dh
|
||||
UQRSHR_sb 11000001 011 ..... 110110 ...01 ..... @rshr_sb
|
||||
UQRSHR_dh 11000001 1.1 ..... 110110 ...01 ..... @rshr_dh
|
||||
SQRSHRU_sb 11000001 011 ..... 110110 ...10 ..... @rshr_sb
|
||||
SQRSHRU_dh 11000001 1.1 ..... 110110 ...10 ..... @rshr_dh
|
||||
|
||||
SQRSHRN_sh 01000101 1011 .... 001010 ....0 ..... @rshr_sh
|
||||
UQRSHRN_sh 01000101 1011 .... 001110 ....0 ..... @rshr_sh
|
||||
SQRSHRUN_sh 01000101 1011 .... 000010 ....0 ..... @rshr_sh
|
||||
|
||||
SQRSHRN_sb 11000001 011 ..... 110111 ...00 ..... @rshr_sb
|
||||
SQRSHRN_dh 11000001 1.1 ..... 110111 ...00 ..... @rshr_dh
|
||||
UQRSHRN_sb 11000001 011 ..... 110111 ...01 ..... @rshr_sb
|
||||
UQRSHRN_dh 11000001 1.1 ..... 110111 ...01 ..... @rshr_dh
|
||||
SQRSHRUN_sb 11000001 011 ..... 110111 ...10 ..... @rshr_sb
|
||||
SQRSHRUN_dh 11000001 1.1 ..... 110111 ...10 ..... @rshr_dh
|
||||
|
||||
&zzz_e zd zn zm esz
|
||||
|
||||
ZIP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 0 \
|
||||
&zzz_e zd=%zd_ax2
|
||||
ZIP_2 11000001 00 1 zm:5 110101 zn:5 .... 0 \
|
||||
&zzz_e zd=%zd_ax2 esz=4
|
||||
|
||||
UZP_2 11000001 esz:2 1 zm:5 110100 zn:5 .... 1 \
|
||||
&zzz_e zd=%zd_ax2
|
||||
UZP_2 11000001 00 1 zm:5 110101 zn:5 .... 1 \
|
||||
&zzz_e zd=%zd_ax2 esz=4
|
||||
|
||||
&zzz_en zd zn zm esz n
|
||||
|
||||
FCLAMP 11000001 esz:2 1 zm:5 110000 zn:5 .... 0 \
|
||||
&zzz_en zd=%zd_ax2 n=2
|
||||
FCLAMP 11000001 esz:2 1 zm:5 110010 zn:5 ...0 0 \
|
||||
&zzz_en zd=%zd_ax4 n=4
|
||||
|
||||
SCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 0 \
|
||||
&zzz_en zd=%zd_ax2 n=2
|
||||
SCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 0 \
|
||||
&zzz_en zd=%zd_ax4 n=4
|
||||
|
||||
UCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 1 \
|
||||
&zzz_en zd=%zd_ax2 n=2
|
||||
UCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 1 \
|
||||
&zzz_en zd=%zd_ax4 n=4
|
||||
|
||||
### SME2 Multi-vector SVE Select
|
||||
|
||||
%sel_pg 10:3 !function=plus_8
|
||||
|
||||
SEL 11000001 esz:2 1 ....0 100 ... ....0 ....0 \
|
||||
n=2 zd=%zd_ax2 zn=%zn_ax2 zm=%zm_ax2 pg=%sel_pg
|
||||
SEL 11000001 esz:2 1 ...01 100 ... ...00 ...00 \
|
||||
n=4 zd=%zd_ax4 zn=%zn_ax4 zm=%zm_ax4 pg=%sel_pg
|
||||
|
||||
### SME Multiple Zero
|
||||
|
||||
&zero_za rv off ngrp nvec
|
||||
|
||||
ZERO_za 11000000 000011 000 .. 0000000000 off:3 \
|
||||
&zero_za ngrp=2 nvec=1 rv=%mova_rv
|
||||
ZERO_za 11000000 000011 100 .. 0000000000 off:3 \
|
||||
&zero_za ngrp=4 nvec=1 rv=%mova_rv
|
||||
|
||||
ZERO_za 11000000 000011 001 .. 0000000000 ... \
|
||||
&zero_za ngrp=1 nvec=2 rv=%mova_rv off=%off3_x2
|
||||
ZERO_za 11000000 000011 010 .. 0000000000 0.. \
|
||||
&zero_za ngrp=2 nvec=2 rv=%mova_rv off=%off2_x2
|
||||
ZERO_za 11000000 000011 011 .. 0000000000 0.. \
|
||||
&zero_za ngrp=4 nvec=2 rv=%mova_rv off=%off2_x2
|
||||
|
||||
ZERO_za 11000000 000011 101 .. 0000000000 0.. \
|
||||
&zero_za ngrp=1 nvec=4 rv=%mova_rv off=%off2_x4
|
||||
ZERO_za 11000000 000011 110 .. 0000000000 00. \
|
||||
&zero_za ngrp=2 nvec=4 rv=%mova_rv off=%off1_x4
|
||||
ZERO_za 11000000 000011 111 .. 0000000000 00. \
|
||||
&zero_za ngrp=4 nvec=4 rv=%mova_rv off=%off1_x4
|
||||
|
||||
### SME Lookup Table Read
|
||||
|
||||
&lut zd zn idx
|
||||
|
||||
# LUTI2, consecutive
|
||||
LUTI2_c_1b 1100 0000 1100 11 idx:4 00 00 zn:5 zd:5 &lut
|
||||
LUTI2_c_1h 1100 0000 1100 11 idx:4 01 00 zn:5 zd:5 &lut
|
||||
LUTI2_c_1s 1100 0000 1100 11 idx:4 10 00 zn:5 zd:5 &lut
|
||||
|
||||
LUTI2_c_2b 1100 0000 1000 11 idx:3 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
LUTI2_c_2h 1100 0000 1000 11 idx:3 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
LUTI2_c_2s 1100 0000 1000 11 idx:3 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
|
||||
LUTI2_c_4b 1100 0000 1000 11 idx:2 10 00 00 zn:5 ... 00 &lut zd=%zd_ax4
|
||||
LUTI2_c_4h 1100 0000 1000 11 idx:2 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4
|
||||
LUTI2_c_4s 1100 0000 1000 11 idx:2 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4
|
||||
|
||||
# LUTI2, strided (must check zd alignment)
|
||||
LUTI2_s_2b 1100 0000 1001 11 idx:3 1 00 00 zn:5 zd:5 &lut
|
||||
LUTI2_s_2h 1100 0000 1001 11 idx:3 1 01 00 zn:5 zd:5 &lut
|
||||
|
||||
LUTI2_s_4b 1100 0000 1001 11 idx:2 10 00 00 zn:5 zd:5 &lut
|
||||
LUTI2_s_4h 1100 0000 1001 11 idx:2 10 01 00 zn:5 zd:5 &lut
|
||||
|
||||
# LUTI4, consecutive
|
||||
LUTI4_c_1b 1100 0000 1100 101 idx:3 00 00 zn:5 zd:5 &lut
|
||||
LUTI4_c_1h 1100 0000 1100 101 idx:3 01 00 zn:5 zd:5 &lut
|
||||
LUTI4_c_1s 1100 0000 1100 101 idx:3 10 00 zn:5 zd:5 &lut
|
||||
|
||||
LUTI4_c_2b 1100 0000 1000 101 idx:2 1 00 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
LUTI4_c_2h 1100 0000 1000 101 idx:2 1 01 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
LUTI4_c_2s 1100 0000 1000 101 idx:2 1 10 00 zn:5 .... 0 &lut zd=%zd_ax2
|
||||
|
||||
LUTI4_c_4h 1100 0000 1000 101 idx:1 10 01 00 zn:5 ... 00 &lut zd=%zd_ax4
|
||||
LUTI4_c_4s 1100 0000 1000 101 idx:1 10 10 00 zn:5 ... 00 &lut zd=%zd_ax4
|
||||
|
||||
# LUTI4, strided (must check zd alignment)
|
||||
LUTI4_s_2b 1100 0000 1001 101 idx:2 1 00 00 zn:5 zd:5 &lut
|
||||
LUTI4_s_2h 1100 0000 1001 101 idx:2 1 01 00 zn:5 zd:5 &lut
|
||||
|
||||
LUTI4_s_4h 1100 0000 1001 101 idx:1 10 01 00 zn:5 zd:5 &lut
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,7 @@
|
||||
%size_23 23:2
|
||||
%dtype_23_13 23:2 13:2
|
||||
%index3_22_19 22:1 19:2
|
||||
%index3_22_17 22:1 17:2
|
||||
%index3_19_11 19:2 11:1
|
||||
%index2_20_11 20:1 11:1
|
||||
|
||||
@ -57,6 +58,11 @@
|
||||
# as propagated via the MOVPRFX instruction.
|
||||
%reg_movprfx 0:5
|
||||
|
||||
%rn_ax2 6:4 !function=times_2
|
||||
|
||||
%pnd 0:3 !function=plus_8
|
||||
%pnn 5:3 !function=plus_8
|
||||
|
||||
###########################################################################
|
||||
# Named attribute sets. These are used to make nice(er) names
|
||||
# when creating helpers common to those for the individual
|
||||
@ -102,6 +108,7 @@
|
||||
# Two operand
|
||||
@pd_pn ........ esz:2 .. .... ....... rn:4 . rd:4 &rr_esz
|
||||
@rd_rn ........ esz:2 ...... ...... rn:5 rd:5 &rr_esz
|
||||
@rd_rnx2 ........ ... ..... ...... ..... rd:5 &rr_esz rn=%rn_ax2
|
||||
|
||||
# Two operand with governing predicate, flags setting
|
||||
@pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s
|
||||
@ -131,11 +138,11 @@
|
||||
@rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
|
||||
&rrrr_esz ra=%reg_movprfx
|
||||
|
||||
# Four operand with unused vector element size
|
||||
@rda_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 \
|
||||
&rrrr_esz esz=0 ra=%reg_movprfx
|
||||
@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \
|
||||
&rrrr_esz esz=0 rn=%reg_movprfx
|
||||
# Four operand with explicit vector element size
|
||||
@rda_rn_rm_ex ........ ... rm:5 ... ... rn:5 rd:5 \
|
||||
&rrrr_esz ra=%reg_movprfx
|
||||
@rdn_ra_rm_ex ........ ... rm:5 ... ... ra:5 rd:5 \
|
||||
&rrrr_esz rn=%reg_movprfx
|
||||
|
||||
# Three operand with "memory" size, aka immediate left shift
|
||||
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
|
||||
@ -222,6 +229,9 @@
|
||||
@rprr_load_dt ....... dtype:4 rm:5 ... pg:3 rn:5 rd:5 &rprr_load
|
||||
@rpri_load_dt ....... dtype:4 . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load
|
||||
|
||||
@rprr_load ....... .... rm:5 ... pg:3 rn:5 rd:5 &rprr_load
|
||||
@rpri_load ....... .... . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load
|
||||
|
||||
@rprr_load_msz ....... .... rm:5 ... pg:3 rn:5 rd:5 \
|
||||
&rprr_load dtype=%msz_dtype
|
||||
@rpri_load_msz ....... .... . imm:s4 ... pg:3 rn:5 rd:5 \
|
||||
@ -245,7 +255,7 @@
|
||||
|
||||
# Stores; user must fill in ESZ, MSZ, NREG as needed.
|
||||
@rprr_store ....... .. .. rm:5 ... pg:3 rn:5 rd:5 &rprr_store
|
||||
@rpri_store_msz ....... msz:2 .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store
|
||||
@rpri_store ....... .. .. . imm:s4 ... pg:3 rn:5 rd:5 &rpri_store
|
||||
@rprr_store_esz_n0 ....... .. esz:2 rm:5 ... pg:3 rn:5 rd:5 \
|
||||
&rprr_store nreg=0
|
||||
@rprr_scatter_store ....... msz:2 .. rm:5 ... pg:3 rn:5 rd:5 \
|
||||
@ -320,6 +330,11 @@ ORV 00000100 .. 011 000 001 ... ..... ..... @rd_pg_rn
|
||||
EORV 00000100 .. 011 001 001 ... ..... ..... @rd_pg_rn
|
||||
ANDV 00000100 .. 011 010 001 ... ..... ..... @rd_pg_rn
|
||||
|
||||
# SVE2.1 bitwise logical reduction (quadwords)
|
||||
ORQV 00000100 .. 011 100 001 ... ..... ..... @rd_pg_rn
|
||||
EORQV 00000100 .. 011 101 001 ... ..... ..... @rd_pg_rn
|
||||
ANDQV 00000100 .. 011 110 001 ... ..... ..... @rd_pg_rn
|
||||
|
||||
# SVE constructive prefix (predicated)
|
||||
MOVPRFX_z 00000100 .. 010 000 001 ... ..... ..... @rd_pg_rn
|
||||
MOVPRFX_m 00000100 .. 010 001 001 ... ..... ..... @rd_pg_rn
|
||||
@ -335,6 +350,13 @@ UMAXV 00000100 .. 001 001 001 ... ..... ..... @rd_pg_rn
|
||||
SMINV 00000100 .. 001 010 001 ... ..... ..... @rd_pg_rn
|
||||
UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
|
||||
|
||||
# SVE2.1 segment reduction
|
||||
ADDQV 00000100 .. 000 101 001 ... ..... ..... @rd_pg_rn
|
||||
SMAXQV 00000100 .. 001 100 001 ... ..... ..... @rd_pg_rn
|
||||
SMINQV 00000100 .. 001 110 001 ... ..... ..... @rd_pg_rn
|
||||
UMAXQV 00000100 .. 001 101 001 ... ..... ..... @rd_pg_rn
|
||||
UMINQV 00000100 .. 001 111 001 ... ..... ..... @rd_pg_rn
|
||||
|
||||
### SVE Shift by Immediate - Predicated Group
|
||||
|
||||
# SVE bitwise shift by immediate (predicated)
|
||||
@ -428,12 +450,12 @@ XAR 00000100 .. 1 ..... 001 101 rm:5 rd:5 &rrri_esz \
|
||||
rn=%reg_movprfx esz=%tszimm16_esz imm=%tszimm16_shr
|
||||
|
||||
# SVE2 bitwise ternary operations
|
||||
EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
|
||||
BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
|
||||
BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
|
||||
BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
|
||||
BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
|
||||
NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
|
||||
EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_ex esz=0
|
||||
|
||||
### SVE Index Generation Group
|
||||
|
||||
@ -559,6 +581,14 @@ DUP_s 00000101 .. 1 00000 001110 ..... ..... @rd_rn
|
||||
DUP_x 00000101 .. 1 ..... 001000 rn:5 rd:5 \
|
||||
&rri imm=%imm7_22_16
|
||||
|
||||
# SVE Permute Vector - one source quadwords
|
||||
DUPQ 00000101 001 imm:4 1 001001 rn:5 rd:5 &rri_esz esz=0
|
||||
DUPQ 00000101 001 imm:3 10 001001 rn:5 rd:5 &rri_esz esz=1
|
||||
DUPQ 00000101 001 imm:2 100 001001 rn:5 rd:5 &rri_esz esz=2
|
||||
DUPQ 00000101 001 imm:1 1000 001001 rn:5 rd:5 &rri_esz esz=3
|
||||
|
||||
EXTQ 00000101 0110 imm:4 001001 rn:5 rd:5 &rri
|
||||
|
||||
# SVE insert SIMD&FP scalar register
|
||||
INSR_f 00000101 .. 1 10100 001110 ..... ..... @rdn_rm
|
||||
|
||||
@ -568,6 +598,22 @@ INSR_r 00000101 .. 1 00100 001110 ..... ..... @rdn_rm
|
||||
# SVE reverse vector elements
|
||||
REV_v 00000101 .. 1 11000 001110 ..... ..... @rd_rn
|
||||
|
||||
# SVE move predicate to/from vector
|
||||
|
||||
PMOV_pv 00000101 00 101 01 0001110 rn:5 0 rd:4 \
|
||||
&rri_esz esz=0 imm=0
|
||||
PMOV_pv 00000101 00 101 1 imm:1 0001110 rn:5 0 rd:4 &rri_esz esz=1
|
||||
PMOV_pv 00000101 01 101 imm:2 0001110 rn:5 0 rd:4 &rri_esz esz=2
|
||||
PMOV_pv 00000101 1. 101 .. 0001110 rn:5 0 rd:4 \
|
||||
&rri_esz esz=3 imm=%index3_22_17
|
||||
|
||||
PMOV_vp 00000101 00 101 01 1001110 0 rn:4 rd:5 \
|
||||
&rri_esz esz=0 imm=0
|
||||
PMOV_vp 00000101 00 101 1 imm:1 1001110 0 rn:4 rd:5 &rri_esz esz=1
|
||||
PMOV_vp 00000101 01 101 imm:2 1001110 0 rn:4 rd:5 &rri_esz esz=2
|
||||
PMOV_vp 00000101 1. 101 .. 1001110 0 rn:4 rd:5 \
|
||||
&rri_esz esz=3 imm=%index3_22_17
|
||||
|
||||
# SVE vector table lookup
|
||||
TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm
|
||||
|
||||
@ -614,6 +660,15 @@ UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0
|
||||
TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0
|
||||
TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0
|
||||
|
||||
# SVE2.1 permute vector elements (quadwords)
|
||||
ZIPQ1 01000100 .. 0 ..... 111 000 ..... ..... @rd_rn_rm
|
||||
ZIPQ2 01000100 .. 0 ..... 111 001 ..... ..... @rd_rn_rm
|
||||
UZPQ1 01000100 .. 0 ..... 111 010 ..... ..... @rd_rn_rm
|
||||
UZPQ2 01000100 .. 0 ..... 111 011 ..... ..... @rd_rn_rm
|
||||
|
||||
TBLQ 01000100 .. 0 ..... 111 110 ..... ..... @rd_rn_rm
|
||||
TBXQ 00000101 .. 1 ..... 001 101 ..... ..... @rd_rn_rm
|
||||
|
||||
### SVE Permute - Predicated Group
|
||||
|
||||
# SVE compress active elements
|
||||
@ -725,6 +780,7 @@ PTEST 00100101 01 010000 11 pg:4 0 rn:4 0 0000
|
||||
|
||||
# SVE predicate initialize
|
||||
PTRUE 00100101 esz:2 01100 s:1 111000 pat:5 0 rd:4
|
||||
PTRUE_cnt 00100101 esz:2 1000000111100000010 ... rd=%pnd
|
||||
|
||||
# SVE initialize FFR
|
||||
SETFFR 00100101 0010 1100 1001 0000 0000 0000
|
||||
@ -765,7 +821,8 @@ BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
|
||||
### SVE Predicate Count Group
|
||||
|
||||
# SVE predicate count
|
||||
CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
|
||||
CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
|
||||
CNTP_c 00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5
|
||||
|
||||
# SVE inc/dec register by predicate count
|
||||
INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1
|
||||
@ -786,11 +843,35 @@ SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
|
||||
CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
|
||||
|
||||
# SVE integer compare scalar count and limit
|
||||
WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
|
||||
&while esz rd rn rm sf u eq
|
||||
WHILE_lt 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4 &while
|
||||
WHILE_gt 00100101 esz:2 1 rm:5 000 sf:1 u:1 0 rn:5 eq:1 rd:4 &while
|
||||
|
||||
# SVE2 pointer conflict compare
|
||||
WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
|
||||
|
||||
# SVE2.1 predicate pair
|
||||
%pd_pair 1:3 !function=times_2
|
||||
@while_pair ........ esz:2 . rm:5 .... u:1 . rn:5 . ... eq:1 \
|
||||
&while rd=%pd_pair sf=1
|
||||
|
||||
WHILE_lt_pair 00100101 .. 1 ..... 0101 . 1 ..... 1 ... . @while_pair
|
||||
WHILE_gt_pair 00100101 .. 1 ..... 0101 . 0 ..... 1 ... . @while_pair
|
||||
|
||||
# SVE2.1 predicate as count
|
||||
@while_cnt ........ esz:2 . rm:5 .... u:1 . rn:5 . eq:1 ... \
|
||||
&while rd=%pnd sf=1
|
||||
|
||||
WHILE_lt_cnt2 00100101 .. 1 ..... 0100 . 1 ..... 1 . ... @while_cnt
|
||||
WHILE_lt_cnt4 00100101 .. 1 ..... 0110 . 1 ..... 1 . ... @while_cnt
|
||||
WHILE_gt_cnt2 00100101 .. 1 ..... 0100 . 0 ..... 1 . ... @while_cnt
|
||||
WHILE_gt_cnt4 00100101 .. 1 ..... 0110 . 0 ..... 1 . ... @while_cnt
|
||||
|
||||
# SVE2.1 extract mask predicate from predicate-as-counter
|
||||
&pext rd rn esz imm
|
||||
PEXT_1 00100101 esz:2 1 00000 0111 00 imm:2 ... 1 rd:4 &pext rn=%pnn
|
||||
PEXT_2 00100101 esz:2 1 00000 0111 010 imm:1 ... 1 rd:4 &pext rn=%pnn
|
||||
|
||||
### SVE Integer Wide Immediate - Unpredicated Group
|
||||
|
||||
# SVE broadcast floating-point immediate (unpredicated)
|
||||
@ -851,10 +932,13 @@ CDOT_zzzz 01000100 esz:2 0 rm:5 0001 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
||||
#### SVE Multiply - Indexed
|
||||
|
||||
# SVE integer dot product (indexed)
|
||||
SDOT_zzxw_s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
|
||||
SDOT_zzxw_d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
|
||||
UDOT_zzxw_s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
|
||||
UDOT_zzxw_d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
|
||||
SDOT_zzxw_4s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
|
||||
SDOT_zzxw_4d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
|
||||
UDOT_zzxw_4s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
|
||||
UDOT_zzxw_4d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
|
||||
|
||||
SDOT_zzxw_2s 01000100 10 0 ..... 110010 ..... ..... @rrxr_2 esz=2
|
||||
UDOT_zzxw_2s 01000100 10 0 ..... 110011 ..... ..... @rrxr_2 esz=2
|
||||
|
||||
# SVE2 integer multiply-add (indexed)
|
||||
MLA_zzxz_h 01000100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=1
|
||||
@ -873,8 +957,8 @@ SQRDMLSH_zzxz_s 01000100 10 1 ..... 000101 ..... ..... @rrxr_2 esz=2
|
||||
SQRDMLSH_zzxz_d 01000100 11 1 ..... 000101 ..... ..... @rrxr_1 esz=3
|
||||
|
||||
# SVE mixed sign dot product (indexed)
|
||||
USDOT_zzxw_s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2
|
||||
SUDOT_zzxw_s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2
|
||||
USDOT_zzxw_4s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2
|
||||
SUDOT_zzxw_4s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2
|
||||
|
||||
# SVE2 saturating multiply-add (indexed)
|
||||
SQDMLALB_zzxw_s 01000100 10 1 ..... 0010.0 ..... ..... @rrxr_3a esz=2
|
||||
@ -990,6 +1074,14 @@ FMINNMV 01100101 .. 000 101 001 ... ..... ..... @rd_pg_rn
|
||||
FMAXV 01100101 .. 000 110 001 ... ..... ..... @rd_pg_rn
|
||||
FMINV 01100101 .. 000 111 001 ... ..... ..... @rd_pg_rn
|
||||
|
||||
### SVE FP recursive reduction (quadwords)
|
||||
|
||||
FADDQV 01100100 .. 010 000 101 ... ..... ..... @rd_pg_rn
|
||||
FMAXNMQV 01100100 .. 010 100 101 ... ..... ..... @rd_pg_rn
|
||||
FMINNMQV 01100100 .. 010 101 101 ... ..... ..... @rd_pg_rn
|
||||
FMAXQV 01100100 .. 010 110 101 ... ..... ..... @rd_pg_rn
|
||||
FMINQV 01100100 .. 010 111 101 ... ..... ..... @rd_pg_rn
|
||||
|
||||
## SVE Floating Point Unary Operations - Unpredicated Group
|
||||
|
||||
FRECPE 01100101 .. 001 110 001100 ..... ..... @rd_rn
|
||||
@ -1151,12 +1243,24 @@ LD1_zpiz 1000010 .. 01 ..... 1.. ... ..... ..... \
|
||||
|
||||
# SVE contiguous load (scalar plus scalar)
|
||||
LD_zprr 1010010 .... ..... 010 ... ..... ..... @rprr_load_dt nreg=0
|
||||
# LD1W (128-bit element)
|
||||
LD_zprr 1010010 1000 rm:5 100 pg:3 rn:5 rd:5 \
|
||||
&rprr_load dtype=16 nreg=0
|
||||
# LD1D (128-bit element)
|
||||
LD_zprr 1010010 1100 rm:5 100 pg:3 rn:5 rd:5 \
|
||||
&rprr_load dtype=17 nreg=0
|
||||
|
||||
# SVE contiguous first-fault load (scalar plus scalar)
|
||||
LDFF1_zprr 1010010 .... ..... 011 ... ..... ..... @rprr_load_dt nreg=0
|
||||
|
||||
# SVE contiguous load (scalar plus immediate)
|
||||
LD_zpri 1010010 .... 0.... 101 ... ..... ..... @rpri_load_dt nreg=0
|
||||
# LD1W (128-bit element)
|
||||
LD_zpri 1010010 1000 1 imm:s4 001 pg:3 rn:5 rd:5 \
|
||||
&rpri_load dtype=16 nreg=0
|
||||
# LD1D (128-bit element)
|
||||
LD_zpri 1010010 1100 1 imm:s4 001 pg:3 rn:5 rd:5 \
|
||||
&rpri_load dtype=17 nreg=0
|
||||
|
||||
# SVE contiguous non-fault load (scalar plus immediate)
|
||||
LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0
|
||||
@ -1166,12 +1270,26 @@ LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0
|
||||
# SVE load multiple structures (scalar plus scalar)
|
||||
# LD2B, LD2H, LD2W, LD2D; etc.
|
||||
LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz
|
||||
# LD[234]Q
|
||||
LD_zprr 1010010 01 01 ..... 100 ... ..... ..... \
|
||||
@rprr_load dtype=18 nreg=1
|
||||
LD_zprr 1010010 10 01 ..... 100 ... ..... ..... \
|
||||
@rprr_load dtype=18 nreg=2
|
||||
LD_zprr 1010010 11 01 ..... 100 ... ..... ..... \
|
||||
@rprr_load dtype=18 nreg=3
|
||||
|
||||
# SVE contiguous non-temporal load (scalar plus immediate)
|
||||
# LDNT1B, LDNT1H, LDNT1W, LDNT1D
|
||||
# SVE load multiple structures (scalar plus immediate)
|
||||
# LD2B, LD2H, LD2W, LD2D; etc.
|
||||
LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz
|
||||
# LD[234]Q
|
||||
LD_zpri 1010010 01 001 .... 111 ... ..... ..... \
|
||||
@rpri_load dtype=18 nreg=1
|
||||
LD_zpri 1010010 10 001 .... 111 ... ..... ..... \
|
||||
@rpri_load dtype=18 nreg=2
|
||||
LD_zpri 1010010 11 001 .... 111 ... ..... ..... \
|
||||
@rpri_load dtype=18 nreg=3
|
||||
|
||||
# SVE load and broadcast quadword (scalar plus scalar)
|
||||
LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \
|
||||
@ -1222,6 +1340,10 @@ LD1_zprz 1100010 10 1. ..... 1.. ... ..... ..... \
|
||||
LD1_zprz 1100010 11 1. ..... 11. ... ..... ..... \
|
||||
@rprr_g_load_sc esz=3 msz=3 u=1
|
||||
|
||||
# LD1Q
|
||||
LD1_zprz 1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 \
|
||||
&rprr_gather_load u=0 ff=0 xs=2 esz=4 msz=4 scale=0
|
||||
|
||||
# SVE 64-bit gather load (vector plus immediate)
|
||||
LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
|
||||
@rpri_g_load esz=3
|
||||
@ -1245,8 +1367,20 @@ STR_zri 1110010 11 0. ..... 010 ... ..... ..... @rd_rn_i9
|
||||
|
||||
# SVE contiguous store (scalar plus immediate)
|
||||
# ST1B, ST1H, ST1W, ST1D; require msz <= esz
|
||||
ST_zpri 1110010 .. esz:2 0.... 111 ... ..... ..... \
|
||||
@rpri_store_msz nreg=0
|
||||
ST_zpri 1110010 00 esz:2 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=0 nreg=0
|
||||
ST_zpri 1110010 01 esz:2 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=1 nreg=0
|
||||
ST_zpri 1110010 10 10 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=2 esz=2 nreg=0
|
||||
ST_zpri 1110010 10 11 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=2 esz=3 nreg=0
|
||||
ST_zpri 1110010 11 11 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=3 esz=3 nreg=0
|
||||
ST_zpri 1110010 10 00 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=2 esz=4 nreg=0
|
||||
ST_zpri 1110010 11 10 0.... 111 ... ..... ..... \
|
||||
@rpri_store msz=3 esz=4 nreg=0
|
||||
|
||||
# SVE contiguous store (scalar plus scalar)
|
||||
# ST1B, ST1H, ST1W, ST1D; require msz <= esz
|
||||
@ -1255,20 +1389,40 @@ ST_zprr 1110010 00 .. ..... 010 ... ..... ..... \
|
||||
@rprr_store_esz_n0 msz=0
|
||||
ST_zprr 1110010 01 .. ..... 010 ... ..... ..... \
|
||||
@rprr_store_esz_n0 msz=1
|
||||
ST_zprr 1110010 10 .. ..... 010 ... ..... ..... \
|
||||
@rprr_store_esz_n0 msz=2
|
||||
ST_zprr 1110010 10 10 ..... 010 ... ..... ..... \
|
||||
@rprr_store msz=2 esz=2 nreg=0
|
||||
ST_zprr 1110010 10 11 ..... 010 ... ..... ..... \
|
||||
@rprr_store msz=2 esz=3 nreg=0
|
||||
ST_zprr 1110010 11 11 ..... 010 ... ..... ..... \
|
||||
@rprr_store msz=3 esz=3 nreg=0
|
||||
ST_zprr 1110010 10 00 ..... 010 ... ..... ..... \
|
||||
@rprr_store msz=2 esz=4 nreg=0
|
||||
ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \
|
||||
@rprr_store msz=3 esz=4 nreg=0
|
||||
|
||||
# SVE contiguous non-temporal store (scalar plus immediate) (nreg == 0)
|
||||
# SVE store multiple structures (scalar plus immediate) (nreg != 0)
|
||||
ST_zpri 1110010 .. nreg:2 1.... 111 ... ..... ..... \
|
||||
@rpri_store_msz esz=%size_23
|
||||
@rpri_store msz=%size_23 esz=%size_23
|
||||
# ST[234]Q
|
||||
ST_zpri 11100100 01 00 .... 000 ... ..... ..... \
|
||||
@rpri_store msz=4 esz=4 nreg=1
|
||||
ST_zpri 11100100 10 00 .... 000 ... ..... ..... \
|
||||
@rpri_store msz=4 esz=4 nreg=2
|
||||
ST_zpri 11100100 11 00 .... 000 ... ..... ..... \
|
||||
@rpri_store msz=4 esz=4 nreg=3
|
||||
|
||||
# SVE contiguous non-temporal store (scalar plus scalar) (nreg == 0)
|
||||
# SVE store multiple structures (scalar plus scalar) (nreg != 0)
|
||||
ST_zprr 1110010 msz:2 nreg:2 ..... 011 ... ..... ..... \
|
||||
@rprr_store esz=%size_23
|
||||
ST_zprr 1110010 .. nreg:2 ..... 011 ... ..... ..... \
|
||||
@rprr_store msz=%size_23 esz=%size_23
|
||||
# ST[234]Q
|
||||
ST_zprr 11100100 01 1 ..... 000 ... ..... ..... \
|
||||
@rprr_store msz=4 esz=4 nreg=1
|
||||
ST_zprr 11100100 10 1 ..... 000 ... ..... ..... \
|
||||
@rprr_store msz=4 esz=4 nreg=2
|
||||
ST_zprr 11100100 11 1 ..... 000 ... ..... ..... \
|
||||
@rprr_store msz=4 esz=4 nreg=3
|
||||
|
||||
# SVE 32-bit scatter store (scalar plus 32-bit scaled offsets)
|
||||
# Require msz > 0 && msz <= esz.
|
||||
@ -1293,6 +1447,10 @@ ST1_zprz 1110010 .. 01 ..... 101 ... ..... ..... \
|
||||
ST1_zprz 1110010 .. 00 ..... 101 ... ..... ..... \
|
||||
@rprr_scatter_store xs=2 esz=3 scale=0
|
||||
|
||||
# ST1Q
|
||||
ST1_zprz 1110 0100 001 rm:5 001 pg:3 rn:5 rd:5 \
|
||||
&rprr_scatter_store xs=2 msz=4 esz=4 scale=0
|
||||
|
||||
# SVE 64-bit scatter store (vector plus immediate)
|
||||
ST1_zpiz 1110010 .. 10 ..... 101 ... ..... ..... \
|
||||
@rpri_scatter_store esz=3
|
||||
@ -1450,9 +1608,9 @@ EORTB 01000101 .. 0 ..... 10010 1 ..... ..... @rd_rn_rm
|
||||
|
||||
## SVE integer matrix multiply accumulate
|
||||
|
||||
SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
|
||||
USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
|
||||
UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
|
||||
SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
## SVE2 bitwise permute
|
||||
|
||||
@ -1504,13 +1662,22 @@ UABA 01000101 .. 0 ..... 11111 1 ..... ..... @rd_rn_rm
|
||||
#### SVE2 Narrowing
|
||||
|
||||
## SVE2 saturating extract narrow
|
||||
|
||||
# Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0.
|
||||
SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl
|
||||
|
||||
{
|
||||
SQCVTN_sh 01000101 00 1 10001 010 000 ....0 ..... @rd_rnx2 esz=1
|
||||
SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl
|
||||
}
|
||||
SQXTNT 01000101 .. 1 ..... 010 001 ..... ..... @rd_rn_tszimm_shl
|
||||
UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl
|
||||
{
|
||||
UQCVTN_sh 01000101 00 1 10001 010 010 ....0 ..... @rd_rnx2 esz=1
|
||||
UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl
|
||||
}
|
||||
UQXTNT 01000101 .. 1 ..... 010 011 ..... ..... @rd_rn_tszimm_shl
|
||||
SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl
|
||||
{
|
||||
SQCVTUN_sh 01000101 00 1 10001 010 100 ....0 ..... @rd_rnx2 esz=1
|
||||
SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl
|
||||
}
|
||||
SQXTUNT 01000101 .. 1 ..... 010 101 ..... ..... @rd_rn_tszimm_shl
|
||||
|
||||
## SVE2 bitwise shift right narrow
|
||||
@ -1597,14 +1764,17 @@ UMLSLT_zzzw 01000100 .. 0 ..... 010 111 ..... ..... @rda_rn_rm
|
||||
CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
||||
SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx
|
||||
|
||||
## SVE mixed sign dot product
|
||||
## SVE dot product
|
||||
|
||||
USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm
|
||||
SDOT_zzzz_2s 01000100 00 0 ..... 110 010 ..... ..... @rda_rn_rm_ex esz=2
|
||||
UDOT_zzzz_2s 01000100 00 0 ..... 110 011 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
USDOT_zzzz_4s 01000100 10 0 ..... 011 110 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
### SVE2 floating point matrix multiply accumulate
|
||||
BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
|
||||
FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
|
||||
FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
|
||||
BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=1
|
||||
FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=2
|
||||
FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_ex esz=3
|
||||
|
||||
### SVE2 Memory Gather Load Group
|
||||
|
||||
@ -1654,26 +1824,35 @@ FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0
|
||||
FLOGB 01100101 00 011 esz:2 0101 pg:3 rn:5 rd:5 &rpr_esz
|
||||
|
||||
### SVE2 floating-point multiply-add long (vectors)
|
||||
FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
|
||||
FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
|
||||
FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0
|
||||
FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0
|
||||
FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2
|
||||
FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
|
||||
BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
|
||||
BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_ex esz=2
|
||||
BFMLSLB_zzzw 01100100 11 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
BFMLSLT_zzzw 01100100 11 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
### SVE2 floating-point bfloat16 dot-product
|
||||
BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
|
||||
### SVE2 floating-point dot-product
|
||||
FDOT_zzzz 01100100 00 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_ex esz=2
|
||||
|
||||
### SVE2 floating-point multiply-add long (indexed)
|
||||
|
||||
FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
|
||||
FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
|
||||
FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
|
||||
FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
|
||||
|
||||
BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
|
||||
BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
|
||||
BFMLSLB_zzxw 01100100 11 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
|
||||
BFMLSLT_zzxw 01100100 11 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
|
||||
|
||||
### SVE2 floating-point bfloat16 dot-product (indexed)
|
||||
### SVE2 floating-point dot-product (indexed)
|
||||
|
||||
FDOT_zzxz 01100100 00 1 ..... 010000 ..... ..... @rrxr_2 esz=2
|
||||
BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
|
||||
|
||||
### SVE broadcast predicate element
|
||||
@ -1700,3 +1879,55 @@ PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
|
||||
|
||||
SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
|
||||
UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
|
||||
|
||||
FCLAMP 01100100 .. 1 ..... 001001 ..... ..... @rda_rn_rm
|
||||
|
||||
### SVE2p1 multi-vec contiguous load
|
||||
|
||||
&zcrr_ldst rd png rn rm esz nreg
|
||||
&zcri_ldst rd png rn imm esz nreg
|
||||
%png 10:3 !function=plus_8
|
||||
%zd_ax2 1:4 !function=times_2
|
||||
%zd_ax4 2:3 !function=times_4
|
||||
|
||||
LD1_zcrr 10100000000 rm:5 0 esz:2 ... rn:5 .... - \
|
||||
&zcrr_ldst %png rd=%zd_ax2 nreg=2
|
||||
LD1_zcrr 10100000000 rm:5 1 esz:2 ... rn:5 ... 0- \
|
||||
&zcrr_ldst %png rd=%zd_ax4 nreg=4
|
||||
|
||||
ST1_zcrr 10100000001 rm:5 0 esz:2 ... rn:5 .... - \
|
||||
&zcrr_ldst %png rd=%zd_ax2 nreg=2
|
||||
ST1_zcrr 10100000001 rm:5 1 esz:2 ... rn:5 ... 0- \
|
||||
&zcrr_ldst %png rd=%zd_ax4 nreg=4
|
||||
|
||||
LD1_zcri 101000000100 imm:s4 0 esz:2 ... rn:5 .... - \
|
||||
&zcri_ldst %png rd=%zd_ax2 nreg=2
|
||||
LD1_zcri 101000000100 imm:s4 1 esz:2 ... rn:5 ... 0- \
|
||||
&zcri_ldst %png rd=%zd_ax4 nreg=4
|
||||
|
||||
ST1_zcri 101000000110 imm:s4 0 esz:2 ... rn:5 .... - \
|
||||
&zcri_ldst %png rd=%zd_ax2 nreg=2
|
||||
ST1_zcri 101000000110 imm:s4 1 esz:2 ... rn:5 ... 0- \
|
||||
&zcri_ldst %png rd=%zd_ax4 nreg=4
|
||||
|
||||
# Note: N bit and 0 bit (for nreg4) still mashed in rd.
|
||||
# This is handled within gen_ldst_c().
|
||||
LD1_zcrr_stride 10100001000 rm:5 0 esz:2 ... rn:5 rd:5 \
|
||||
&zcrr_ldst %png nreg=2
|
||||
LD1_zcrr_stride 10100001000 rm:5 1 esz:2 ... rn:5 rd:5 \
|
||||
&zcrr_ldst %png nreg=4
|
||||
|
||||
ST1_zcrr_stride 10100001001 rm:5 0 esz:2 ... rn:5 rd:5 \
|
||||
&zcrr_ldst %png nreg=2
|
||||
ST1_zcrr_stride 10100001001 rm:5 1 esz:2 ... rn:5 rd:5 \
|
||||
&zcrr_ldst %png nreg=4
|
||||
|
||||
LD1_zcri_stride 101000010100 imm:s4 0 esz:2 ... rn:5 rd:5 \
|
||||
&zcri_ldst %png nreg=2
|
||||
LD1_zcri_stride 101000010100 imm:s4 1 esz:2 ... rn:5 rd:5 \
|
||||
&zcri_ldst %png nreg=4
|
||||
|
||||
ST1_zcri_stride 101000010110 imm:s4 0 esz:2 ... rn:5 rd:5 \
|
||||
&zcri_ldst %png nreg=2
|
||||
ST1_zcri_stride 101000010110 imm:s4 1 esz:2 ... rn:5 rd:5 \
|
||||
&zcri_ldst %png nreg=4
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -116,6 +116,94 @@ DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
|
||||
DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
|
||||
DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
|
||||
|
||||
#define DO_LD_PRIM_3(NAME, FUNC) \
|
||||
static inline void sve_##NAME##_host(void *vd, \
|
||||
intptr_t reg_off, void *host) \
|
||||
{ sve_##FUNC##_host(vd, reg_off, host); \
|
||||
*(uint64_t *)(vd + reg_off + 8) = 0; } \
|
||||
static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
|
||||
intptr_t reg_off, target_ulong addr, uintptr_t ra) \
|
||||
{ sve_##FUNC##_tlb(env, vd, reg_off, addr, ra); \
|
||||
*(uint64_t *)(vd + reg_off + 8) = 0; }
|
||||
|
||||
DO_LD_PRIM_3(ld1squ_be, ld1sdu_be)
|
||||
DO_LD_PRIM_3(ld1squ_le, ld1sdu_le)
|
||||
DO_LD_PRIM_3(ld1dqu_be, ld1dd_be)
|
||||
DO_LD_PRIM_3(ld1dqu_le, ld1dd_le)
|
||||
|
||||
#define sve_st1sq_be_host sve_st1sd_be_host
|
||||
#define sve_st1sq_le_host sve_st1sd_le_host
|
||||
#define sve_st1sq_be_tlb sve_st1sd_be_tlb
|
||||
#define sve_st1sq_le_tlb sve_st1sd_le_tlb
|
||||
|
||||
#define sve_st1dq_be_host sve_st1dd_be_host
|
||||
#define sve_st1dq_le_host sve_st1dd_le_host
|
||||
#define sve_st1dq_be_tlb sve_st1dd_be_tlb
|
||||
#define sve_st1dq_le_tlb sve_st1dd_le_tlb
|
||||
|
||||
/*
|
||||
* The ARMVectorReg elements are stored in host-endian 64-bit units.
|
||||
* For 128-bit quantities, the sequence defined by the Elem[] pseudocode
|
||||
* corresponds to storing the two 64-bit pieces in little-endian order.
|
||||
*/
|
||||
/* FIXME: Nothing in this file makes any effort at atomicity. */
|
||||
|
||||
static inline void sve_ld1qq_be_host(void *vd, intptr_t reg_off, void *host)
|
||||
{
|
||||
sve_ld1dd_be_host(vd, reg_off + 8, host);
|
||||
sve_ld1dd_be_host(vd, reg_off, host + 8);
|
||||
}
|
||||
|
||||
static inline void sve_ld1qq_le_host(void *vd, intptr_t reg_off, void *host)
|
||||
{
|
||||
sve_ld1dd_le_host(vd, reg_off, host);
|
||||
sve_ld1dd_le_host(vd, reg_off + 8, host + 8);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sve_ld1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
|
||||
target_ulong addr, uintptr_t ra)
|
||||
{
|
||||
sve_ld1dd_be_tlb(env, vd, reg_off + 8, addr, ra);
|
||||
sve_ld1dd_be_tlb(env, vd, reg_off, addr + 8, ra);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sve_ld1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
|
||||
target_ulong addr, uintptr_t ra)
|
||||
{
|
||||
sve_ld1dd_le_tlb(env, vd, reg_off, addr, ra);
|
||||
sve_ld1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra);
|
||||
}
|
||||
|
||||
static inline void sve_st1qq_be_host(void *vd, intptr_t reg_off, void *host)
|
||||
{
|
||||
sve_st1dd_be_host(vd, reg_off + 8, host);
|
||||
sve_st1dd_be_host(vd, reg_off, host + 8);
|
||||
}
|
||||
|
||||
static inline void sve_st1qq_le_host(void *vd, intptr_t reg_off, void *host)
|
||||
{
|
||||
sve_st1dd_le_host(vd, reg_off, host);
|
||||
sve_st1dd_le_host(vd, reg_off + 8, host + 8);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sve_st1qq_be_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
|
||||
target_ulong addr, uintptr_t ra)
|
||||
{
|
||||
sve_st1dd_be_tlb(env, vd, reg_off + 8, addr, ra);
|
||||
sve_st1dd_be_tlb(env, vd, reg_off, addr + 8, ra);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sve_st1qq_le_tlb(CPUARMState *env, void *vd, intptr_t reg_off,
|
||||
target_ulong addr, uintptr_t ra)
|
||||
{
|
||||
sve_st1dd_le_tlb(env, vd, reg_off, addr, ra);
|
||||
sve_st1dd_le_tlb(env, vd, reg_off + 8, addr + 8, ra);
|
||||
}
|
||||
|
||||
#undef DO_LD_TLB
|
||||
#undef DO_ST_TLB
|
||||
#undef DO_LD_HOST
|
||||
@ -123,6 +211,7 @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
|
||||
#undef DO_ST_PRIM_1
|
||||
#undef DO_LD_PRIM_2
|
||||
#undef DO_ST_PRIM_2
|
||||
#undef DO_LD_PRIM_3
|
||||
|
||||
/*
|
||||
* Resolve the guest virtual address to info->host and info->flags.
|
||||
|
@ -1381,11 +1381,8 @@ static bool fp_access_check_only(DisasContext *s)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool fp_access_check(DisasContext *s)
|
||||
static bool nonstreaming_check(DisasContext *s)
|
||||
{
|
||||
if (!fp_access_check_only(s)) {
|
||||
return false;
|
||||
}
|
||||
if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
|
||||
gen_exception_insn(s, 0, EXCP_UDEF,
|
||||
syn_smetrap(SME_ET_Streaming, false));
|
||||
@ -1394,6 +1391,11 @@ static bool fp_access_check(DisasContext *s)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool fp_access_check(DisasContext *s)
|
||||
{
|
||||
return fp_access_check_only(s) && nonstreaming_check(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return <0 for non-supported element sizes, with MO_16 controlled by
|
||||
* FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
|
||||
@ -1444,14 +1446,24 @@ static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
|
||||
*/
|
||||
bool sve_access_check(DisasContext *s)
|
||||
{
|
||||
if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
|
||||
if (dc_isar_feature(aa64_sme, s)) {
|
||||
bool ret;
|
||||
|
||||
assert(dc_isar_feature(aa64_sme, s));
|
||||
ret = sme_sm_enabled_check(s);
|
||||
if (s->pstate_sm) {
|
||||
ret = sme_enabled_check(s);
|
||||
} else if (dc_isar_feature(aa64_sve, s)) {
|
||||
goto continue_sve;
|
||||
} else {
|
||||
ret = sme_sm_enabled_check(s);
|
||||
}
|
||||
if (ret) {
|
||||
ret = nonstreaming_check(s);
|
||||
}
|
||||
s->sve_access_checked = (ret ? 1 : -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
continue_sve:
|
||||
if (s->sve_excp_el) {
|
||||
/* Assert that we only raise one exception per instruction. */
|
||||
assert(!s->sve_access_checked);
|
||||
@ -1488,7 +1500,8 @@ bool sme_enabled_check(DisasContext *s)
|
||||
* to be zero when fp_excp_el has priority. This is because we need
|
||||
* sme_excp_el by itself for cpregs access checks.
|
||||
*/
|
||||
if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
|
||||
if (s->sme_excp_el
|
||||
&& (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) {
|
||||
bool ret = sme_access_check(s);
|
||||
s->fp_access_checked = (ret ? 1 : -1);
|
||||
return ret;
|
||||
@ -6101,9 +6114,9 @@ static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
|
||||
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
|
||||
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
|
||||
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b)
|
||||
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b)
|
||||
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b)
|
||||
TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
|
||||
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
|
||||
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
|
||||
@ -6863,12 +6876,12 @@ static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
|
||||
return true;
|
||||
}
|
||||
|
||||
TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
|
||||
TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
|
||||
TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b)
|
||||
TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b)
|
||||
TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
||||
gen_helper_gvec_sudot_idx_b)
|
||||
gen_helper_gvec_sudot_idx_4b)
|
||||
TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
|
||||
gen_helper_gvec_usdot_idx_b)
|
||||
gen_helper_gvec_usdot_idx_4b)
|
||||
TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
|
||||
gen_helper_gvec_bfdot_idx)
|
||||
|
||||
@ -10126,8 +10139,10 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
|
||||
dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
|
||||
dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
|
||||
dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
|
||||
dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL);
|
||||
dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
|
||||
dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
|
||||
dc->max_svl = arm_cpu->sme_max_vq * 16;
|
||||
dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
|
||||
dc->bt = EX_TBFLAG_A64(tb_flags, BT);
|
||||
dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
|
||||
|
@ -225,7 +225,13 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
|
||||
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
|
||||
void gen_gvec_sve2_sqdmulh(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs,
|
||||
int len, int rn, int imm, MemOp align);
|
||||
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs,
|
||||
int len, int rn, int imm, MemOp align);
|
||||
|
||||
#endif /* TARGET_ARM_TRANSLATE_A64_H */
|
||||
|
@ -271,7 +271,7 @@ static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
|
||||
gen_helper_gvec_sdot_b);
|
||||
gen_helper_gvec_sdot_4b);
|
||||
}
|
||||
|
||||
static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
|
||||
@ -280,7 +280,7 @@ static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
|
||||
gen_helper_gvec_udot_b);
|
||||
gen_helper_gvec_udot_4b);
|
||||
}
|
||||
|
||||
static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
|
||||
@ -289,7 +289,7 @@ static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
|
||||
gen_helper_gvec_usdot_b);
|
||||
gen_helper_gvec_usdot_4b);
|
||||
}
|
||||
|
||||
static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
|
||||
@ -356,7 +356,7 @@ static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
||||
gen_helper_gvec_sdot_idx_b);
|
||||
gen_helper_gvec_sdot_idx_4b);
|
||||
}
|
||||
|
||||
static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
|
||||
@ -365,7 +365,7 @@ static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
||||
gen_helper_gvec_udot_idx_b);
|
||||
gen_helper_gvec_udot_idx_4b);
|
||||
}
|
||||
|
||||
static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
|
||||
@ -374,7 +374,7 @@ static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
||||
gen_helper_gvec_usdot_idx_b);
|
||||
gen_helper_gvec_usdot_idx_4b);
|
||||
}
|
||||
|
||||
static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
|
||||
@ -383,7 +383,7 @@ static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
|
||||
return false;
|
||||
}
|
||||
return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
|
||||
gen_helper_gvec_sudot_idx_b);
|
||||
gen_helper_gvec_sudot_idx_4b);
|
||||
}
|
||||
|
||||
static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
|
||||
@ -1010,8 +1010,8 @@ DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
|
||||
DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
|
||||
DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
|
||||
DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
|
||||
DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
|
||||
DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
|
||||
DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_nf_s, gen_helper_gvec_fmla_nf_h)
|
||||
DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_nf_s, gen_helper_gvec_fmls_nf_h)
|
||||
DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
|
||||
DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
|
||||
DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -70,8 +70,10 @@ typedef struct DisasContext {
|
||||
int fp_excp_el; /* FP exception EL or 0 if enabled */
|
||||
int sve_excp_el; /* SVE exception EL or 0 if enabled */
|
||||
int sme_excp_el; /* SME exception EL or 0 if enabled */
|
||||
int zt0_excp_el; /* ZT0 exception EL or 0 if enabled */
|
||||
int vl; /* current vector length in bytes */
|
||||
int svl; /* current streaming vector length in bytes */
|
||||
int max_svl; /* maximum implemented streaming vector length */
|
||||
bool vfp_enabled; /* FP enabled via FPSCR.EN */
|
||||
int vec_len;
|
||||
int vec_stride;
|
||||
@ -208,6 +210,11 @@ static inline int plus_2(DisasContext *s, int x)
|
||||
return x + 2;
|
||||
}
|
||||
|
||||
static inline int plus_8(DisasContext *s, int x)
|
||||
{
|
||||
return x + 8;
|
||||
}
|
||||
|
||||
static inline int plus_12(DisasContext *s, int x)
|
||||
{
|
||||
return x + 12;
|
||||
@ -636,6 +643,8 @@ typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
|
||||
uint32_t, uint32_t, uint32_t);
|
||||
typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
|
||||
uint32_t, uint32_t, uint32_t);
|
||||
typedef void GVecGen3FnVar(unsigned, TCGv_ptr, uint32_t, TCGv_ptr, uint32_t,
|
||||
TCGv_ptr, uint32_t, uint32_t, uint32_t);
|
||||
|
||||
/* Function prototype for gen_ functions for calling Neon helpers */
|
||||
typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32);
|
||||
|
@ -825,11 +825,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t)
|
||||
DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t)
|
||||
DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t)
|
||||
DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t)
|
||||
DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t)
|
||||
DO_DOT(gvec_sdot_4b, int32_t, int8_t, int8_t)
|
||||
DO_DOT(gvec_udot_4b, uint32_t, uint8_t, uint8_t)
|
||||
DO_DOT(gvec_usdot_4b, uint32_t, uint8_t, int8_t)
|
||||
DO_DOT(gvec_sdot_4h, int64_t, int16_t, int16_t)
|
||||
DO_DOT(gvec_udot_4h, uint64_t, uint16_t, uint16_t)
|
||||
|
||||
#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
@ -865,12 +865,63 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4)
|
||||
DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4)
|
||||
DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4)
|
||||
DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4)
|
||||
DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
|
||||
DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
|
||||
DO_DOT_IDX(gvec_sdot_idx_4b, int32_t, int8_t, int8_t, H4)
|
||||
DO_DOT_IDX(gvec_udot_idx_4b, uint32_t, uint8_t, uint8_t, H4)
|
||||
DO_DOT_IDX(gvec_sudot_idx_4b, int32_t, int8_t, uint8_t, H4)
|
||||
DO_DOT_IDX(gvec_usdot_idx_4b, int32_t, uint8_t, int8_t, H4)
|
||||
DO_DOT_IDX(gvec_sdot_idx_4h, int64_t, int16_t, int16_t, H8)
|
||||
DO_DOT_IDX(gvec_udot_idx_4h, uint64_t, uint16_t, uint16_t, H8)
|
||||
|
||||
#undef DO_DOT
|
||||
#undef DO_DOT_IDX
|
||||
|
||||
/* Similar for 2-way dot product */
|
||||
#define DO_DOT(NAME, TYPED, TYPEN, TYPEM) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t i, opr_sz = simd_oprsz(desc); \
|
||||
TYPED *d = vd, *a = va; \
|
||||
TYPEN *n = vn; \
|
||||
TYPEM *m = vm; \
|
||||
for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \
|
||||
d[i] = (a[i] + \
|
||||
(TYPED)n[i * 2 + 0] * m[i * 2 + 0] + \
|
||||
(TYPED)n[i * 2 + 1] * m[i * 2 + 1]); \
|
||||
} \
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t i = 0, opr_sz = simd_oprsz(desc); \
|
||||
intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \
|
||||
intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \
|
||||
intptr_t index = simd_data(desc); \
|
||||
TYPED *d = vd, *a = va; \
|
||||
TYPEN *n = vn; \
|
||||
TYPEM *m_indexed = (TYPEM *)vm + HD(index) * 2; \
|
||||
do { \
|
||||
TYPED m0 = m_indexed[i * 2 + 0]; \
|
||||
TYPED m1 = m_indexed[i * 2 + 1]; \
|
||||
do { \
|
||||
d[i] = (a[i] + \
|
||||
n[i * 2 + 0] * m0 + \
|
||||
n[i * 2 + 1] * m1); \
|
||||
} while (++i < segend); \
|
||||
segend = i + (16 / sizeof(TYPED)); \
|
||||
} while (i < opr_sz_n); \
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_DOT(gvec_sdot_2h, int32_t, int16_t, int16_t)
|
||||
DO_DOT(gvec_udot_2h, uint32_t, uint16_t, uint16_t)
|
||||
|
||||
DO_DOT_IDX(gvec_sdot_idx_2h, int32_t, int16_t, int16_t, H4)
|
||||
DO_DOT_IDX(gvec_udot_idx_2h, uint32_t, uint16_t, uint16_t, H4)
|
||||
|
||||
#undef DO_DOT
|
||||
#undef DO_DOT_IDX
|
||||
|
||||
void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
|
||||
float_status *fpst, uint32_t desc)
|
||||
@ -1419,10 +1470,12 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
||||
DO_3OP(gvec_fadd_h, float16_add, float16)
|
||||
DO_3OP(gvec_fadd_s, float32_add, float32)
|
||||
DO_3OP(gvec_fadd_d, float64_add, float64)
|
||||
DO_3OP(gvec_bfadd, bfloat16_add, bfloat16)
|
||||
|
||||
DO_3OP(gvec_fsub_h, float16_sub, float16)
|
||||
DO_3OP(gvec_fsub_s, float32_sub, float32)
|
||||
DO_3OP(gvec_fsub_d, float64_sub, float64)
|
||||
DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16)
|
||||
|
||||
DO_3OP(gvec_fmul_h, float16_mul, float16)
|
||||
DO_3OP(gvec_fmul_s, float32_mul, float32)
|
||||
@ -1515,6 +1568,13 @@ DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16)
|
||||
DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32)
|
||||
DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64)
|
||||
|
||||
DO_3OP(gvec_fmax_b16, bfloat16_max, bfloat16)
|
||||
DO_3OP(gvec_fmin_b16, bfloat16_min, bfloat16)
|
||||
DO_3OP(gvec_fmaxnum_b16, bfloat16_maxnum, bfloat16)
|
||||
DO_3OP(gvec_fminnum_b16, bfloat16_minnum, bfloat16)
|
||||
DO_3OP(gvec_ah_fmax_b16, helper_sme2_ah_fmax_b16, bfloat16)
|
||||
DO_3OP(gvec_ah_fmin_b16, helper_sme2_ah_fmin_b16, bfloat16)
|
||||
|
||||
#endif
|
||||
#undef DO_3OP
|
||||
|
||||
@ -1550,6 +1610,12 @@ static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2,
|
||||
return float16_muladd(op1, op2, dest, 0, stat);
|
||||
}
|
||||
|
||||
static bfloat16 bfloat16_muladd_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
return bfloat16_muladd(op1, op2, dest, 0, stat);
|
||||
}
|
||||
|
||||
static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
@ -1568,6 +1634,12 @@ static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
|
||||
return float16_muladd(float16_chs(op1), op2, dest, 0, stat);
|
||||
}
|
||||
|
||||
static bfloat16 bfloat16_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
return bfloat16_muladd(bfloat16_chs(op1), op2, dest, 0, stat);
|
||||
}
|
||||
|
||||
static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
@ -1586,6 +1658,12 @@ static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
|
||||
return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
|
||||
}
|
||||
|
||||
static bfloat16 bfloat16_ah_mulsub_f(bfloat16 dest, bfloat16 op1, bfloat16 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
return bfloat16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
|
||||
}
|
||||
|
||||
static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
|
||||
float_status *stat)
|
||||
{
|
||||
@ -1610,23 +1688,28 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16)
|
||||
DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32)
|
||||
DO_MULADD(gvec_fmla_nf_h, float16_muladd_nf, float16)
|
||||
DO_MULADD(gvec_fmla_nf_s, float32_muladd_nf, float32)
|
||||
|
||||
DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16)
|
||||
DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
|
||||
DO_MULADD(gvec_fmls_nf_h, float16_mulsub_nf, float16)
|
||||
DO_MULADD(gvec_fmls_nf_s, float32_mulsub_nf, float32)
|
||||
|
||||
DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
|
||||
DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
|
||||
DO_MULADD(gvec_vfma_d, float64_muladd_f, float64)
|
||||
DO_MULADD(gvec_bfmla, bfloat16_muladd_f, bfloat16)
|
||||
|
||||
DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
|
||||
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
|
||||
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
|
||||
DO_MULADD(gvec_bfmls, bfloat16_mulsub_f, bfloat16)
|
||||
|
||||
DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
|
||||
DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
|
||||
DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
|
||||
DO_MULADD(gvec_ah_bfmls, bfloat16_ah_mulsub_f, bfloat16)
|
||||
|
||||
#undef DO_MULADD
|
||||
|
||||
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
|
||||
* For AdvSIMD, there is of course only one such vector segment.
|
||||
@ -1745,14 +1828,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
||||
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
|
||||
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
|
||||
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
|
||||
DO_FMLA_IDX(gvec_bfmla_idx, bfloat16, H2, 0, 0)
|
||||
|
||||
DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
|
||||
DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
|
||||
DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
|
||||
DO_FMLA_IDX(gvec_bfmls_idx, bfloat16, H2, INT16_MIN, 0)
|
||||
|
||||
DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
|
||||
DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
|
||||
DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
|
||||
DO_FMLA_IDX(gvec_ah_bfmls_idx, bfloat16, H2, 0, float_muladd_negate_product)
|
||||
|
||||
#undef DO_FMLA_IDX
|
||||
|
||||
@ -2184,7 +2270,8 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
||||
float_status *status = &env->vfp.fp_status[FPST_A64];
|
||||
bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||
float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64];
|
||||
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||
int negx = 0, negf = 0;
|
||||
|
||||
@ -2267,8 +2354,9 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
|
||||
intptr_t i, j, oprsz = simd_oprsz(desc);
|
||||
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
||||
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
|
||||
float_status *status = &env->vfp.fp_status[FPST_A64];
|
||||
bool za = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 3, 3) * sizeof(float16);
|
||||
float_status *status = &env->vfp.fp_status[za ? FPST_ZA : FPST_A64];
|
||||
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||
int negx = 0, negf = 0;
|
||||
|
||||
@ -2989,31 +3077,62 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst)
|
||||
float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
|
||||
float_status *fpst, float_status *fpst_odd)
|
||||
{
|
||||
/*
|
||||
* Compare f16_dotadd() in sme_helper.c, but here we have
|
||||
* bfloat16 inputs. In particular that means that we do not
|
||||
* want the FPCR.FZ16 flush semantics, so we use the normal
|
||||
* float_status for the input handling here.
|
||||
*/
|
||||
float64 e1r = float32_to_float64(e1 << 16, fpst);
|
||||
float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst);
|
||||
float64 e2r = float32_to_float64(e2 << 16, fpst);
|
||||
float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst);
|
||||
float64 t64;
|
||||
float32 s1r = e1 << 16;
|
||||
float32 s1c = e1 & 0xffff0000u;
|
||||
float32 s2r = e2 << 16;
|
||||
float32 s2c = e2 & 0xffff0000u;
|
||||
float32 t32;
|
||||
|
||||
/*
|
||||
* The ARM pseudocode function FPDot performs both multiplies
|
||||
* and the add with a single rounding operation. Emulate this
|
||||
* by performing the first multiply in round-to-odd, then doing
|
||||
* the second multiply as fused multiply-add, and rounding to
|
||||
* float32 all in one step.
|
||||
*/
|
||||
t64 = float64_mul(e1r, e2r, fpst_odd);
|
||||
t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst);
|
||||
/* C.f. FPProcessNaNs4 */
|
||||
if (float32_is_any_nan(s1r) || float32_is_any_nan(s1c) ||
|
||||
float32_is_any_nan(s2r) || float32_is_any_nan(s2c)) {
|
||||
if (float32_is_signaling_nan(s1r, fpst)) {
|
||||
t32 = s1r;
|
||||
} else if (float32_is_signaling_nan(s1c, fpst)) {
|
||||
t32 = s1c;
|
||||
} else if (float32_is_signaling_nan(s2r, fpst)) {
|
||||
t32 = s2r;
|
||||
} else if (float32_is_signaling_nan(s2c, fpst)) {
|
||||
t32 = s2c;
|
||||
} else if (float32_is_any_nan(s1r)) {
|
||||
t32 = s1r;
|
||||
} else if (float32_is_any_nan(s1c)) {
|
||||
t32 = s1c;
|
||||
} else if (float32_is_any_nan(s2r)) {
|
||||
t32 = s2r;
|
||||
} else {
|
||||
t32 = s2c;
|
||||
}
|
||||
/*
|
||||
* FPConvertNaN(FPProcessNaN(t32)) will be done as part
|
||||
* of the final addition below.
|
||||
*/
|
||||
} else {
|
||||
/*
|
||||
* Compare f16_dotadd() in sme_helper.c, but here we have
|
||||
* bfloat16 inputs. In particular that means that we do not
|
||||
* want the FPCR.FZ16 flush semantics, so we use the normal
|
||||
* float_status for the input handling here.
|
||||
*/
|
||||
float64 e1r = float32_to_float64(s1r, fpst);
|
||||
float64 e1c = float32_to_float64(s1c, fpst);
|
||||
float64 e2r = float32_to_float64(s2r, fpst);
|
||||
float64 e2c = float32_to_float64(s2c, fpst);
|
||||
float64 t64;
|
||||
|
||||
/* This conversion is exact, because we've already rounded. */
|
||||
t32 = float64_to_float32(t64, fpst);
|
||||
/*
|
||||
* The ARM pseudocode function FPDot performs both multiplies
|
||||
* and the add with a single rounding operation. Emulate this
|
||||
* by performing the first multiply in round-to-odd, then doing
|
||||
* the second multiply as fused multiply-add, and rounding to
|
||||
* float32 all in one step.
|
||||
*/
|
||||
t64 = float64_mul(e1r, e2r, fpst_odd);
|
||||
t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst);
|
||||
|
||||
/* This conversion is exact, because we've already rounded. */
|
||||
t32 = float64_to_float32(t64, fpst);
|
||||
}
|
||||
|
||||
/* The final accumulation step is not fused. */
|
||||
return float32_add(sum, t32, fpst);
|
||||
@ -3070,6 +3189,45 @@ void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(sme2_bfvdot_idx)(void *vd, void *vn, void *vm,
|
||||
void *va, CPUARMState *env, uint32_t desc)
|
||||
{
|
||||
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT, 2);
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||
intptr_t elements = opr_sz / 4;
|
||||
intptr_t eltspersegment = MIN(16 / 4, elements);
|
||||
float32 *d = vd, *a = va;
|
||||
uint16_t *n0 = vn;
|
||||
uint16_t *n1 = vn + sizeof(ARMVectorReg);
|
||||
uint32_t *m = vm;
|
||||
float_status fpst, fpst_odd;
|
||||
|
||||
if (is_ebf(env, &fpst, &fpst_odd)) {
|
||||
for (i = 0; i < elements; i += eltspersegment) {
|
||||
uint32_t m_idx = m[i + H4(idx)];
|
||||
|
||||
for (j = 0; j < eltspersegment; j++) {
|
||||
uint32_t nn = (n0[H2(2 * (i + j) + sel)])
|
||||
| (n1[H2(2 * (i + j) + sel)] << 16);
|
||||
d[i + H4(j)] = bfdotadd_ebf(a[i + H4(j)], nn, m_idx,
|
||||
&fpst, &fpst_odd);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < elements; i += eltspersegment) {
|
||||
uint32_t m_idx = m[i + H4(idx)];
|
||||
|
||||
for (j = 0; j < eltspersegment; j++) {
|
||||
uint32_t nn = (n0[H2(2 * (i + j) + sel)])
|
||||
| (n1[H2(2 * (i + j) + sel)] << 16);
|
||||
d[i + H4(j)] = bfdotadd(a[i + H4(j)], nn, m_idx, &fpst);
|
||||
}
|
||||
}
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va,
|
||||
CPUARMState *env, uint32_t desc)
|
||||
{
|
||||
@ -3146,44 +3304,76 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va,
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
static void do_bfmlal(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a,
|
||||
float_status *stat, uint32_t desc, int negx, int negf)
|
||||
{
|
||||
intptr_t i, opr_sz = simd_oprsz(desc);
|
||||
intptr_t sel = simd_data(desc);
|
||||
float32 *d = vd, *a = va;
|
||||
bfloat16 *n = vn, *m = vm;
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||
|
||||
for (i = 0; i < opr_sz / 4; ++i) {
|
||||
float32 nn = n[H2(i * 2 + sel)] << 16;
|
||||
float32 nn = (negx ^ n[H2(i * 2 + sel)]) << 16;
|
||||
float32 mm = m[H2(i * 2 + sel)] << 16;
|
||||
d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat);
|
||||
d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], negf, stat);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
|
||||
void *va, float_status *stat, uint32_t desc)
|
||||
void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal(vd, vn, vm, va, stat, desc, 0, 0);
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlsl)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal(vd, vn, vm, va, stat, desc, 0x8000, 0);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ah_bfmlsl)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product);
|
||||
}
|
||||
|
||||
static void do_bfmlal_idx(float32 *d, bfloat16 *n, bfloat16 *m, float32 *a,
|
||||
float_status *stat, uint32_t desc, int negx, int negf)
|
||||
{
|
||||
intptr_t i, j, opr_sz = simd_oprsz(desc);
|
||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3);
|
||||
intptr_t elements = opr_sz / 4;
|
||||
intptr_t eltspersegment = MIN(16 / 4, elements);
|
||||
float32 *d = vd, *a = va;
|
||||
bfloat16 *n = vn, *m = vm;
|
||||
|
||||
for (i = 0; i < elements; i += eltspersegment) {
|
||||
float32 m_idx = m[H2(2 * i + index)] << 16;
|
||||
|
||||
for (j = i; j < i + eltspersegment; j++) {
|
||||
float32 n_j = n[H2(2 * j + sel)] << 16;
|
||||
d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat);
|
||||
float32 n_j = (negx ^ n[H2(2 * j + sel)]) << 16;
|
||||
d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], negf, stat);
|
||||
}
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, 0);
|
||||
}
|
||||
|
||||
void HELPER(gvec_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0x8000, 0);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ah_bfmlsl_idx)(void *vd, void *vn, void *vm, void *va,
|
||||
float_status *stat, uint32_t desc)
|
||||
{
|
||||
do_bfmlal_idx(vd, vn, vm, va, stat, desc, 0, float_muladd_negate_product);
|
||||
}
|
||||
|
||||
#define DO_CLAMP(NAME, TYPE) \
|
||||
void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
|
||||
{ \
|
||||
@ -3253,3 +3443,91 @@ void HELPER(gvec_ursqrte_s)(void *vd, void *vn, uint32_t desc)
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
static inline void do_lut_b(void *zd, uint64_t *indexes, uint64_t *table,
|
||||
unsigned elements, unsigned segbase,
|
||||
unsigned dstride, unsigned isize,
|
||||
unsigned tsize, unsigned nreg)
|
||||
{
|
||||
for (unsigned r = 0; r < nreg; ++r) {
|
||||
uint8_t *dst = zd + dstride * r;
|
||||
unsigned base = segbase + r * elements;
|
||||
|
||||
for (unsigned e = 0; e < elements; ++e) {
|
||||
unsigned index = extractn(indexes, (base + e) * isize, isize);
|
||||
dst[H1(e)] = extractn(table, index * tsize, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void do_lut_h(void *zd, uint64_t *indexes, uint64_t *table,
|
||||
unsigned elements, unsigned segbase,
|
||||
unsigned dstride, unsigned isize,
|
||||
unsigned tsize, unsigned nreg)
|
||||
{
|
||||
for (unsigned r = 0; r < nreg; ++r) {
|
||||
uint16_t *dst = zd + dstride * r;
|
||||
unsigned base = segbase + r * elements;
|
||||
|
||||
for (unsigned e = 0; e < elements; ++e) {
|
||||
unsigned index = extractn(indexes, (base + e) * isize, isize);
|
||||
dst[H2(e)] = extractn(table, index * tsize, 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void do_lut_s(void *zd, uint64_t *indexes, uint32_t *table,
|
||||
unsigned elements, unsigned segbase,
|
||||
unsigned dstride, unsigned isize,
|
||||
unsigned tsize, unsigned nreg)
|
||||
{
|
||||
for (unsigned r = 0; r < nreg; ++r) {
|
||||
uint32_t *dst = zd + dstride * r;
|
||||
unsigned base = segbase + r * elements;
|
||||
|
||||
for (unsigned e = 0; e < elements; ++e) {
|
||||
unsigned index = extractn(indexes, (base + e) * isize, isize);
|
||||
dst[H4(e)] = table[H4(index)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DO_SME2_LUT(ISIZE, NREG, SUFF, ESIZE) \
|
||||
void helper_sme2_luti##ISIZE##_##NREG##SUFF \
|
||||
(void *zd, void *zn, CPUARMState *env, uint32_t desc) \
|
||||
{ \
|
||||
unsigned vl = simd_oprsz(desc); \
|
||||
unsigned strided = extract32(desc, SIMD_DATA_SHIFT, 1); \
|
||||
unsigned idx = extract32(desc, SIMD_DATA_SHIFT + 1, 4); \
|
||||
unsigned elements = vl / ESIZE; \
|
||||
unsigned dstride = (!strided ? 1 : NREG == 4 ? 4 : 8); \
|
||||
unsigned segments = (ESIZE * 8) / (ISIZE * NREG); \
|
||||
unsigned segment = idx & (segments - 1); \
|
||||
ARMVectorReg indexes; \
|
||||
memcpy(&indexes, zn, vl); \
|
||||
do_lut_##SUFF(zd, indexes.d, (void *)env->za_state.zt0, elements, \
|
||||
segment * NREG * elements, \
|
||||
dstride * sizeof(ARMVectorReg), ISIZE, 32, NREG); \
|
||||
}
|
||||
|
||||
DO_SME2_LUT(2,1,b, 1)
|
||||
DO_SME2_LUT(2,1,h, 2)
|
||||
DO_SME2_LUT(2,1,s, 4)
|
||||
DO_SME2_LUT(2,2,b, 1)
|
||||
DO_SME2_LUT(2,2,h, 2)
|
||||
DO_SME2_LUT(2,2,s, 4)
|
||||
DO_SME2_LUT(2,4,b, 1)
|
||||
DO_SME2_LUT(2,4,h, 2)
|
||||
DO_SME2_LUT(2,4,s, 4)
|
||||
|
||||
DO_SME2_LUT(4,1,b, 1)
|
||||
DO_SME2_LUT(4,1,h, 2)
|
||||
DO_SME2_LUT(4,1,s, 4)
|
||||
DO_SME2_LUT(4,2,b, 1)
|
||||
DO_SME2_LUT(4,2,h, 2)
|
||||
DO_SME2_LUT(4,2,s, 4)
|
||||
DO_SME2_LUT(4,4,b, 1)
|
||||
DO_SME2_LUT(4,4,h, 2)
|
||||
DO_SME2_LUT(4,4,s, 4)
|
||||
|
||||
#undef DO_SME2_LUT
|
||||
|
@ -223,6 +223,34 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
|
||||
int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
|
||||
int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
|
||||
|
||||
#define do_ssat_b(val) MIN(MAX(val, INT8_MIN), INT8_MAX)
|
||||
#define do_ssat_h(val) MIN(MAX(val, INT16_MIN), INT16_MAX)
|
||||
#define do_ssat_s(val) MIN(MAX(val, INT32_MIN), INT32_MAX)
|
||||
#define do_usat_b(val) MIN(MAX(val, 0), UINT8_MAX)
|
||||
#define do_usat_h(val) MIN(MAX(val, 0), UINT16_MAX)
|
||||
#define do_usat_s(val) MIN(MAX(val, 0), UINT32_MAX)
|
||||
|
||||
static inline uint64_t do_urshr(uint64_t x, unsigned sh)
|
||||
{
|
||||
if (likely(sh < 64)) {
|
||||
return (x >> sh) + ((x >> (sh - 1)) & 1);
|
||||
} else if (sh == 64) {
|
||||
return x >> 63;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int64_t do_srshr(int64_t x, unsigned sh)
|
||||
{
|
||||
if (likely(sh < 64)) {
|
||||
return (x >> sh) + ((x >> (sh - 1)) & 1);
|
||||
} else {
|
||||
/* Rounding the sign bit always produces 0. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bfdotadd:
|
||||
* @sum: addend
|
||||
@ -272,6 +300,11 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
|
||||
/*
|
||||
* Negate as for FPCR.AH=1 -- do not negate NaNs.
|
||||
*/
|
||||
static inline float16 bfloat16_ah_chs(float16 a)
|
||||
{
|
||||
return bfloat16_is_any_nan(a) ? a : bfloat16_chs(a);
|
||||
}
|
||||
|
||||
static inline float16 float16_ah_chs(float16 a)
|
||||
{
|
||||
return float16_is_any_nan(a) ? a : float16_chs(a);
|
||||
@ -302,4 +335,119 @@ static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
|
||||
return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
|
||||
}
|
||||
|
||||
/* Not actually called directly as a helper, but uses similar machinery. */
|
||||
bfloat16 helper_sme2_ah_fmax_b16(bfloat16 a, bfloat16 b, float_status *fpst);
|
||||
bfloat16 helper_sme2_ah_fmin_b16(bfloat16 a, bfloat16 b, float_status *fpst);
|
||||
|
||||
float32 sve_f16_to_f32(float16 f, float_status *fpst);
|
||||
float16 sve_f32_to_f16(float32 f, float_status *fpst);
|
||||
|
||||
/*
|
||||
* Decode helper functions for predicate as counter.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
unsigned count;
|
||||
unsigned lg2_stride;
|
||||
bool invert;
|
||||
} DecodeCounter;
|
||||
|
||||
static inline DecodeCounter
|
||||
decode_counter(unsigned png, unsigned vl, unsigned v_esz)
|
||||
{
|
||||
DecodeCounter ret = { };
|
||||
|
||||
/* C.f. Arm pseudocode CounterToPredicate. */
|
||||
if (likely(png & 0xf)) {
|
||||
unsigned p_esz = ctz32(png);
|
||||
|
||||
/*
|
||||
* maxbit = log2(pl(bits) * 4)
|
||||
* = log2(vl(bytes) * 4)
|
||||
* = log2(vl) + 2
|
||||
* maxbit_mask = ones<maxbit:0>
|
||||
* = (1 << (maxbit + 1)) - 1
|
||||
* = (1 << (log2(vl) + 2 + 1)) - 1
|
||||
* = (1 << (log2(vl) + 3)) - 1
|
||||
* = (pow2ceil(vl) << 3) - 1
|
||||
*/
|
||||
ret.count = png & (((unsigned)pow2ceil(vl) << 3) - 1);
|
||||
ret.count >>= p_esz + 1;
|
||||
|
||||
ret.invert = (png >> 15) & 1;
|
||||
|
||||
/*
|
||||
* The Arm pseudocode for CounterToPredicate expands the count to
|
||||
* a set of bits, and then the operation proceeds as for the original
|
||||
* interpretation of predicates as a set of bits.
|
||||
*
|
||||
* We can avoid the expansion by adjusting the count and supplying
|
||||
* an element stride.
|
||||
*/
|
||||
if (unlikely(p_esz != v_esz)) {
|
||||
if (p_esz < v_esz) {
|
||||
/*
|
||||
* For predicate esz < vector esz, the expanded predicate
|
||||
* will have more bits set than will be consumed.
|
||||
* Adjust the count down, rounding up.
|
||||
* Consider p_esz = MO_8, v_esz = MO_64, count 14:
|
||||
* The expanded predicate would be
|
||||
* 0011 1111 1111 1111
|
||||
* The significant bits are
|
||||
* ...1 ...1 ...1 ...1
|
||||
*/
|
||||
unsigned shift = v_esz - p_esz;
|
||||
unsigned trunc = ret.count >> shift;
|
||||
ret.count = trunc + (ret.count != (trunc << shift));
|
||||
} else {
|
||||
/*
|
||||
* For predicate esz > vector esz, the expanded predicate
|
||||
* will have bits set only at power-of-two multiples of
|
||||
* the vector esz. Bits at other multiples will all be
|
||||
* false. Adjust the count up, and supply the caller
|
||||
* with a stride of elements to skip.
|
||||
*/
|
||||
unsigned shift = p_esz - v_esz;
|
||||
ret.count <<= shift;
|
||||
ret.lg2_stride = shift;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Extract @len bits from an array of uint64_t at offset @pos bits. */
|
||||
static inline uint64_t extractn(uint64_t *p, unsigned pos, unsigned len)
|
||||
{
|
||||
uint64_t x;
|
||||
|
||||
p += pos / 64;
|
||||
pos = pos % 64;
|
||||
|
||||
x = p[0];
|
||||
if (pos + len > 64) {
|
||||
x = (x >> pos) | (p[1] << (-pos & 63));
|
||||
pos = 0;
|
||||
}
|
||||
return extract64(x, pos, len);
|
||||
}
|
||||
|
||||
/* Deposit @len bits into an array of uint64_t at offset @pos bits. */
|
||||
static inline void depositn(uint64_t *p, unsigned pos,
|
||||
unsigned len, uint64_t val)
|
||||
{
|
||||
p += pos / 64;
|
||||
pos = pos % 64;
|
||||
|
||||
if (pos + len <= 64) {
|
||||
p[0] = deposit64(p[0], pos, len, val);
|
||||
} else {
|
||||
unsigned len0 = 64 - pos;
|
||||
unsigned len1 = len - len0;
|
||||
|
||||
p[0] = deposit64(p[0], pos, len0, val);
|
||||
p[1] = deposit64(p[1], 0, len1, val >> len0);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
||||
|
@ -123,7 +123,7 @@ uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
|
||||
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
|
||||
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
|
||||
/*
|
||||
* We do not merge in flags from FPST_AH or FPST_AH_F16, because
|
||||
* We do not merge in flags from FPST_{AH,ZA} or FPST_{AH,ZA}_F16, because
|
||||
* they are used for insns that must not set the cumulative exception bits.
|
||||
*/
|
||||
|
||||
@ -196,6 +196,8 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
|
||||
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
|
||||
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
|
||||
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA]);
|
||||
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_ZA_F16]);
|
||||
}
|
||||
if (changed & FPCR_FZ16) {
|
||||
bool ftz_enabled = val & FPCR_FZ16;
|
||||
@ -203,15 +205,18 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA_F16]);
|
||||
}
|
||||
if (changed & FPCR_FZ) {
|
||||
bool ftz_enabled = val & FPCR_FZ;
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
|
||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_ZA]);
|
||||
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
|
||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
|
||||
}
|
||||
@ -223,6 +228,7 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||
bool fitz_enabled = (val & FPCR_FIZ) ||
|
||||
(val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
|
||||
set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
|
||||
set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_ZA]);
|
||||
}
|
||||
if (changed & FPCR_DN) {
|
||||
bool dnan_enabled = val & FPCR_DN;
|
||||
@ -240,9 +246,13 @@ void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||
/* Change behaviours for A64 FP operations */
|
||||
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA]);
|
||||
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]);
|
||||
} else {
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA]);
|
||||
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_ZA_F16]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user