target/arm: Implement {LD1, ST1}{W, D} (128-bit element) for SVE2p1

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250704142112.1018902-96-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2025-07-04 08:20:58 -06:00 committed by Peter Maydell
parent 0af0c9bbb9
commit fc5f060bcb
5 changed files with 183 additions and 27 deletions

View File

@ -1678,9 +1678,15 @@ DEF_HELPER_FLAGS_4(sve_ld1hds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1squ_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1squ_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@ -1736,9 +1742,15 @@ DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1squ_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1dqu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1squ_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1dqu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@ -1946,6 +1958,11 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1dq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1dq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@ -1993,6 +2010,11 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1dq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1dq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,

View File

@ -1240,12 +1240,24 @@ LD1_zpiz 1000010 .. 01 ..... 1.. ... ..... ..... \
# SVE contiguous load (scalar plus scalar) # SVE contiguous load (scalar plus scalar)
LD_zprr 1010010 .... ..... 010 ... ..... ..... @rprr_load_dt nreg=0 LD_zprr 1010010 .... ..... 010 ... ..... ..... @rprr_load_dt nreg=0
# LD1W (128-bit element)
LD_zprr 1010010 1000 rm:5 100 pg:3 rn:5 rd:5 \
&rprr_load dtype=16 nreg=0
# LD1D (128-bit element)
LD_zprr 1010010 1100 rm:5 100 pg:3 rn:5 rd:5 \
&rprr_load dtype=17 nreg=0
# SVE contiguous first-fault load (scalar plus scalar) # SVE contiguous first-fault load (scalar plus scalar)
LDFF1_zprr 1010010 .... ..... 011 ... ..... ..... @rprr_load_dt nreg=0 LDFF1_zprr 1010010 .... ..... 011 ... ..... ..... @rprr_load_dt nreg=0
# SVE contiguous load (scalar plus immediate) # SVE contiguous load (scalar plus immediate)
LD_zpri 1010010 .... 0.... 101 ... ..... ..... @rpri_load_dt nreg=0 LD_zpri 1010010 .... 0.... 101 ... ..... ..... @rpri_load_dt nreg=0
# LD1W (128-bit element)
LD_zpri 1010010 1000 1 imm:s4 001 pg:3 rn:5 rd:5 \
&rpri_load dtype=16 nreg=0
# LD1D (128-bit element)
LD_zpri 1010010 1100 1 imm:s4 001 pg:3 rn:5 rd:5 \
&rpri_load dtype=17 nreg=0
# SVE contiguous non-fault load (scalar plus immediate) # SVE contiguous non-fault load (scalar plus immediate)
LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0
@ -1344,6 +1356,10 @@ ST_zpri 1110010 10 11 0.... 111 ... ..... ..... \
@rpri_store msz=2 esz=3 nreg=0 @rpri_store msz=2 esz=3 nreg=0
ST_zpri 1110010 11 11 0.... 111 ... ..... ..... \ ST_zpri 1110010 11 11 0.... 111 ... ..... ..... \
@rpri_store msz=3 esz=3 nreg=0 @rpri_store msz=3 esz=3 nreg=0
ST_zpri 1110010 10 00 0.... 111 ... ..... ..... \
@rpri_store msz=2 esz=4 nreg=0
ST_zpri 1110010 11 10 0.... 111 ... ..... ..... \
@rpri_store msz=3 esz=4 nreg=0
# SVE contiguous store (scalar plus scalar) # SVE contiguous store (scalar plus scalar)
# ST1B, ST1H, ST1W, ST1D; require msz <= esz # ST1B, ST1H, ST1W, ST1D; require msz <= esz
@ -1358,6 +1374,10 @@ ST_zprr 1110010 10 11 ..... 010 ... ..... ..... \
@rprr_store msz=2 esz=3 nreg=0 @rprr_store msz=2 esz=3 nreg=0
ST_zprr 1110010 11 11 ..... 010 ... ..... ..... \ ST_zprr 1110010 11 11 ..... 010 ... ..... ..... \
@rprr_store msz=3 esz=3 nreg=0 @rprr_store msz=3 esz=3 nreg=0
ST_zprr 1110010 10 00 ..... 010 ... ..... ..... \
@rprr_store msz=2 esz=4 nreg=0
ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \
@rprr_store msz=3 esz=4 nreg=0
# SVE contiguous non-temporal store (scalar plus immediate) (nreg == 0) # SVE contiguous non-temporal store (scalar plus immediate) (nreg == 0)
# SVE store multiple structures (scalar plus immediate) (nreg != 0) # SVE store multiple structures (scalar plus immediate) (nreg != 0)

View File

@ -6359,6 +6359,9 @@ DO_LD1_2(ld1sds, MO_64, MO_32)
DO_LD1_2(ld1dd, MO_64, MO_64) DO_LD1_2(ld1dd, MO_64, MO_64)
DO_LD1_2(ld1squ, MO_32, MO_128)
DO_LD1_2(ld1dqu, MO_64, MO_128)
#undef DO_LD1_1 #undef DO_LD1_1
#undef DO_LD1_2 #undef DO_LD1_2
@ -6981,6 +6984,9 @@ DO_STN_2(2, dd, MO_64, MO_64)
DO_STN_2(3, dd, MO_64, MO_64) DO_STN_2(3, dd, MO_64, MO_64)
DO_STN_2(4, dd, MO_64, MO_64) DO_STN_2(4, dd, MO_64, MO_64)
DO_STN_2(1, sq, MO_128, MO_32)
DO_STN_2(1, dq, MO_128, MO_64)
#undef DO_STN_1 #undef DO_STN_1
#undef DO_STN_2 #undef DO_STN_2

View File

@ -116,6 +116,31 @@ DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
#define DO_LD_PRIM_3(NAME, FUNC) \
static inline void sve_##NAME##_host(void *vd, \
intptr_t reg_off, void *host) \
{ sve_##FUNC##_host(vd, reg_off, host); \
*(uint64_t *)(vd + reg_off + 8) = 0; } \
static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
intptr_t reg_off, target_ulong addr, uintptr_t ra) \
{ sve_##FUNC##_tlb(env, vd, reg_off, addr, ra); \
*(uint64_t *)(vd + reg_off + 8) = 0; }
DO_LD_PRIM_3(ld1squ_be, ld1sdu_be)
DO_LD_PRIM_3(ld1squ_le, ld1sdu_le)
DO_LD_PRIM_3(ld1dqu_be, ld1dd_be)
DO_LD_PRIM_3(ld1dqu_le, ld1dd_le)
#define sve_st1sq_be_host sve_st1sd_be_host
#define sve_st1sq_le_host sve_st1sd_le_host
#define sve_st1sq_be_tlb sve_st1sd_be_tlb
#define sve_st1sq_le_tlb sve_st1sd_le_tlb
#define sve_st1dq_be_host sve_st1dd_be_host
#define sve_st1dq_le_host sve_st1dd_le_host
#define sve_st1dq_be_tlb sve_st1dd_be_tlb
#define sve_st1dq_le_tlb sve_st1dd_le_tlb
#undef DO_LD_TLB #undef DO_LD_TLB
#undef DO_ST_TLB #undef DO_ST_TLB
#undef DO_LD_HOST #undef DO_LD_HOST
@ -123,6 +148,7 @@ DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
#undef DO_ST_PRIM_1 #undef DO_ST_PRIM_1
#undef DO_LD_PRIM_2 #undef DO_LD_PRIM_2
#undef DO_ST_PRIM_2 #undef DO_ST_PRIM_2
#undef DO_LD_PRIM_3
/* /*
* Resolve the guest virtual address to info->host and info->flags. * Resolve the guest virtual address to info->host and info->flags.

View File

@ -4817,21 +4817,25 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
*/ */
/* The memory mode of the dtype. */ /* The memory mode of the dtype. */
static const MemOp dtype_mop[16] = { static const MemOp dtype_mop[19] = {
MO_UB, MO_UB, MO_UB, MO_UB, MO_UB, MO_UB, MO_UB, MO_UB,
MO_SL, MO_UW, MO_UW, MO_UW, MO_SL, MO_UW, MO_UW, MO_UW,
MO_SW, MO_SW, MO_UL, MO_UL, MO_SW, MO_SW, MO_UL, MO_UL,
MO_SB, MO_SB, MO_SB, MO_UQ MO_SB, MO_SB, MO_SB, MO_UQ,
/* Artificial values used by decode */
MO_UL, MO_UQ, MO_128,
}; };
#define dtype_msz(x) (dtype_mop[x] & MO_SIZE) #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
/* The vector element size of dtype. */ /* The vector element size of dtype. */
static const uint8_t dtype_esz[16] = { static const uint8_t dtype_esz[19] = {
0, 1, 2, 3, 0, 1, 2, 3,
3, 1, 2, 3, 3, 1, 2, 3,
3, 2, 2, 3, 3, 2, 2, 3,
3, 2, 1, 3 3, 2, 1, 3,
/* Artificial values used by decode */
4, 4, 4,
}; };
uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
@ -4882,7 +4886,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
} }
/* Indexed by [mte][be][dtype][nreg] */ /* Indexed by [mte][be][dtype][nreg] */
static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = {
{ /* mte inactive, little-endian */ { /* mte inactive, little-endian */
{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
@ -4906,7 +4910,11 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r },
{ gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL },
},
/* mte inactive, big-endian */ /* mte inactive, big-endian */
{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
@ -4931,7 +4939,12 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r },
{ gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL },
},
},
{ /* mte active, little-endian */ { /* mte active, little-endian */
{ { gen_helper_sve_ld1bb_r_mte, { { gen_helper_sve_ld1bb_r_mte,
@ -4964,7 +4977,11 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1dd_le_r_mte, { gen_helper_sve_ld1dd_le_r_mte,
gen_helper_sve_ld2dd_le_r_mte, gen_helper_sve_ld2dd_le_r_mte,
gen_helper_sve_ld3dd_le_r_mte, gen_helper_sve_ld3dd_le_r_mte,
gen_helper_sve_ld4dd_le_r_mte } }, gen_helper_sve_ld4dd_le_r_mte },
{ gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL },
},
/* mte active, big-endian */ /* mte active, big-endian */
{ { gen_helper_sve_ld1bb_r_mte, { { gen_helper_sve_ld1bb_r_mte,
@ -4997,7 +5014,12 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1dd_be_r_mte, { gen_helper_sve_ld1dd_be_r_mte,
gen_helper_sve_ld2dd_be_r_mte, gen_helper_sve_ld2dd_be_r_mte,
gen_helper_sve_ld3dd_be_r_mte, gen_helper_sve_ld3dd_be_r_mte,
gen_helper_sve_ld4dd_be_r_mte } } }, gen_helper_sve_ld4dd_be_r_mte },
{ gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL },
{ gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL },
},
},
}; };
static void do_ld_zpa(DisasContext *s, int zt, int pg, static void do_ld_zpa(DisasContext *s, int zt, int pg,
@ -5016,9 +5038,22 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg,
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
{ {
if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { if (a->rm == 31) {
return false; return false;
} }
/* dtypes 16 and 17 are artificial, representing 128-bit element */
if (a->dtype < 16) {
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
} else {
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
}
if (sve_access_check(s)) { if (sve_access_check(s)) {
TCGv_i64 addr = tcg_temp_new_i64(); TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@ -5030,9 +5065,18 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
{ {
/* dtypes 16 and 17 are artificial, representing 128-bit element */
if (a->dtype < 16) {
if (!dc_isar_feature(aa64_sve, s)) { if (!dc_isar_feature(aa64_sve, s)) {
return false; return false;
} }
} else {
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
}
if (sve_access_check(s)) { if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s); int vsz = vec_full_reg_size(s);
int elements = vsz >> dtype_esz[a->dtype]; int elements = vsz >> dtype_esz[a->dtype];
@ -5479,7 +5523,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
int msz, int esz, int nreg) int msz, int esz, int nreg)
{ {
static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { static gen_helper_gvec_mem * const fn_single[2][2][4][5] = {
{ { { gen_helper_sve_st1bb_r, { { { gen_helper_sve_st1bb_r,
gen_helper_sve_st1bh_r, gen_helper_sve_st1bh_r,
gen_helper_sve_st1bs_r, gen_helper_sve_st1bs_r,
@ -5490,9 +5534,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_le_r }, gen_helper_sve_st1hd_le_r },
{ NULL, NULL, { NULL, NULL,
gen_helper_sve_st1ss_le_r, gen_helper_sve_st1ss_le_r,
gen_helper_sve_st1sd_le_r }, gen_helper_sve_st1sd_le_r,
gen_helper_sve_st1sq_le_r, },
{ NULL, NULL, NULL, { NULL, NULL, NULL,
gen_helper_sve_st1dd_le_r } }, gen_helper_sve_st1dd_le_r,
gen_helper_sve_st1dq_le_r, } },
{ { gen_helper_sve_st1bb_r, { { gen_helper_sve_st1bb_r,
gen_helper_sve_st1bh_r, gen_helper_sve_st1bh_r,
gen_helper_sve_st1bs_r, gen_helper_sve_st1bs_r,
@ -5503,9 +5549,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_be_r }, gen_helper_sve_st1hd_be_r },
{ NULL, NULL, { NULL, NULL,
gen_helper_sve_st1ss_be_r, gen_helper_sve_st1ss_be_r,
gen_helper_sve_st1sd_be_r }, gen_helper_sve_st1sd_be_r,
gen_helper_sve_st1sq_be_r },
{ NULL, NULL, NULL, { NULL, NULL, NULL,
gen_helper_sve_st1dd_be_r } } }, gen_helper_sve_st1dd_be_r,
gen_helper_sve_st1dq_be_r } } },
{ { { gen_helper_sve_st1bb_r_mte, { { { gen_helper_sve_st1bb_r_mte,
gen_helper_sve_st1bh_r_mte, gen_helper_sve_st1bh_r_mte,
@ -5517,9 +5565,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_le_r_mte }, gen_helper_sve_st1hd_le_r_mte },
{ NULL, NULL, { NULL, NULL,
gen_helper_sve_st1ss_le_r_mte, gen_helper_sve_st1ss_le_r_mte,
gen_helper_sve_st1sd_le_r_mte }, gen_helper_sve_st1sd_le_r_mte,
gen_helper_sve_st1sq_le_r_mte },
{ NULL, NULL, NULL, { NULL, NULL, NULL,
gen_helper_sve_st1dd_le_r_mte } }, gen_helper_sve_st1dd_le_r_mte,
gen_helper_sve_st1dq_le_r_mte } },
{ { gen_helper_sve_st1bb_r_mte, { { gen_helper_sve_st1bb_r_mte,
gen_helper_sve_st1bh_r_mte, gen_helper_sve_st1bh_r_mte,
gen_helper_sve_st1bs_r_mte, gen_helper_sve_st1bs_r_mte,
@ -5530,9 +5580,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_be_r_mte }, gen_helper_sve_st1hd_be_r_mte },
{ NULL, NULL, { NULL, NULL,
gen_helper_sve_st1ss_be_r_mte, gen_helper_sve_st1ss_be_r_mte,
gen_helper_sve_st1sd_be_r_mte }, gen_helper_sve_st1sd_be_r_mte,
gen_helper_sve_st1sq_be_r_mte },
{ NULL, NULL, NULL, { NULL, NULL, NULL,
gen_helper_sve_st1dd_be_r_mte } } }, gen_helper_sve_st1dd_be_r_mte,
gen_helper_sve_st1dq_be_r_mte } } },
}; };
static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
{ { { gen_helper_sve_st2bb_r, { { { gen_helper_sve_st2bb_r,
@ -5601,12 +5653,27 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
{ {
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
if (a->rm == 31 || a->msz > a->esz) { if (a->rm == 31 || a->msz > a->esz) {
return false; return false;
} }
switch (a->esz) {
case MO_8 ... MO_64:
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
break;
case MO_128:
assert(a->msz < a->esz);
assert(a->nreg == 0);
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
break;
default:
g_assert_not_reached();
}
if (sve_access_check(s)) { if (sve_access_check(s)) {
TCGv_i64 addr = tcg_temp_new_i64(); TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
@ -5618,12 +5685,27 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
{ {
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
if (a->msz > a->esz) { if (a->msz > a->esz) {
return false; return false;
} }
switch (a->esz) {
case MO_8 ... MO_64:
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
break;
case MO_128:
assert(a->msz < a->esz);
assert(a->nreg == 0);
if (!dc_isar_feature(aa64_sve2p1, s)) {
return false;
}
s->is_nonstreaming = true;
break;
default:
g_assert_not_reached();
}
if (sve_access_check(s)) { if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s); int vsz = vec_full_reg_size(s);
int elements = vsz >> a->esz; int elements = vsz >> a->esz;