mirror of
				https://github.com/qemu/qemu.git
				synced 2025-10-31 12:07:31 +00:00 
			
		
		
		
	target/arm: Implement MVE VMLALDAV
Implement the MVE VMLALDAV insn, which multiplies pairs of integer elements, accumulating them into a 64-bit result in a pair of general-purpose registers. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210617121628.20116-20-peter.maydell@linaro.org
This commit is contained in:
		
							parent
							
								
									ac6ad1dca8
								
							
						
					
					
						commit
						1d2386f70a
					
				| @ -144,3 +144,11 @@ DEF_HELPER_FLAGS_4(mve_vmulltsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) | ||||
| DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) | ||||
| DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) | ||||
| DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr) | ||||
| 
 | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
| 
 | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavuh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
| DEF_HELPER_FLAGS_4(mve_vmlaldavuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64) | ||||
|  | ||||
| @ -130,3 +130,18 @@ VNEG_fp          1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op | ||||
| VDUP             1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0 | ||||
| VDUP             1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1 | ||||
| VDUP             1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2 | ||||
| 
 | ||||
| # multiply-add long dual accumulate | ||||
| # rdahi: bits [3:1] from insn, bit 0 is 1 | ||||
| # rdalo: bits [3:1] from insn, bit 0 is 0 | ||||
| %rdahi 20:3 !function=times_2_plus_1 | ||||
| %rdalo 13:3 !function=times_2 | ||||
| # size bit is 0 for 16 bit, 1 for 32 bit | ||||
| %size_16 16:1 !function=plus_1 | ||||
| 
 | ||||
| &vmlaldav rdahi rdalo size qn qm x a | ||||
| 
 | ||||
| @vmlaldav        .... .... . ... ... . ... . .... .... qm:3 . \ | ||||
|                  qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav | ||||
| VMLALDAV_S       1110 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav | ||||
| VMLALDAV_U       1111 1110 1 ... ... . ... x:1 1110 . 0 a:1 0 ... 0 @vmlaldav | ||||
|  | ||||
| @ -488,3 +488,37 @@ DO_2OP_S(vhadds, do_vhadd_s) | ||||
| DO_2OP_U(vhaddu, do_vhadd_u) | ||||
| DO_2OP_S(vhsubs, do_vhsub_s) | ||||
| DO_2OP_U(vhsubu, do_vhsub_u) | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Multiply add long dual accumulate ops. | ||||
|  */ | ||||
| #define DO_LDAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC)                 \ | ||||
|     uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,         \ | ||||
|                                     void *vm, uint64_t a)               \ | ||||
|     {                                                                   \ | ||||
|         uint16_t mask = mve_element_mask(env);                          \ | ||||
|         unsigned e;                                                     \ | ||||
|         TYPE *n = vn, *m = vm;                                          \ | ||||
|         for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \ | ||||
|             if (mask & 1) {                                             \ | ||||
|                 if (e & 1) {                                            \ | ||||
|                     a ODDACC                                            \ | ||||
|                         (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \ | ||||
|                 } else {                                                \ | ||||
|                     a EVENACC                                           \ | ||||
|                         (int64_t)n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \ | ||||
|                 }                                                       \ | ||||
|             }                                                           \ | ||||
|         }                                                               \ | ||||
|         mve_advance_vpt(env);                                           \ | ||||
|         return a;                                                       \ | ||||
|     } | ||||
| 
 | ||||
| DO_LDAV(vmlaldavsh, 2, int16_t, false, +=, +=) | ||||
| DO_LDAV(vmlaldavxsh, 2, int16_t, true, +=, +=) | ||||
| DO_LDAV(vmlaldavsw, 4, int32_t, false, +=, +=) | ||||
| DO_LDAV(vmlaldavxsw, 4, int32_t, true, +=, +=) | ||||
| 
 | ||||
| DO_LDAV(vmlaldavuh, 2, uint16_t, false, +=, +=) | ||||
| DO_LDAV(vmlaldavuw, 4, uint32_t, false, +=, +=) | ||||
|  | ||||
| @ -31,6 +31,7 @@ | ||||
| typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); | ||||
| typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); | ||||
| typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); | ||||
| typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); | ||||
| 
 | ||||
| /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ | ||||
| static inline long mve_qreg_offset(unsigned reg) | ||||
| @ -88,6 +89,22 @@ static void mve_update_eci(DisasContext *s) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool mve_skip_first_beat(DisasContext *s) | ||||
| { | ||||
|     /* Return true if PSR.ECI says we must skip the first beat of this insn */ | ||||
|     switch (s->eci) { | ||||
|     case ECI_NONE: | ||||
|         return false; | ||||
|     case ECI_A0: | ||||
|     case ECI_A0A1: | ||||
|     case ECI_A0A1A2: | ||||
|     case ECI_A0A1A2B0: | ||||
|         return true; | ||||
|     default: | ||||
|         g_assert_not_reached(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn) | ||||
| { | ||||
|     TCGv_i32 addr; | ||||
| @ -365,3 +382,82 @@ DO_2OP(VMULL_BS, vmullbs) | ||||
| DO_2OP(VMULL_BU, vmullbu) | ||||
| DO_2OP(VMULL_TS, vmullts) | ||||
| DO_2OP(VMULL_TU, vmulltu) | ||||
| 
 | ||||
| static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, | ||||
|                              MVEGenDualAccOpFn *fn) | ||||
| { | ||||
|     TCGv_ptr qn, qm; | ||||
|     TCGv_i64 rda; | ||||
|     TCGv_i32 rdalo, rdahi; | ||||
| 
 | ||||
|     if (!dc_isar_feature(aa32_mve, s) || | ||||
|         !mve_check_qreg_bank(s, a->qn | a->qm) || | ||||
|         !fn) { | ||||
|         return false; | ||||
|     } | ||||
|     /*
 | ||||
|      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related | ||||
|      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. | ||||
|      */ | ||||
|     if (a->rdahi == 13 || a->rdahi == 15) { | ||||
|         return false; | ||||
|     } | ||||
|     if (!mve_eci_check(s) || !vfp_access_check(s)) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     qn = mve_qreg_ptr(a->qn); | ||||
|     qm = mve_qreg_ptr(a->qm); | ||||
| 
 | ||||
|     /*
 | ||||
|      * This insn is subject to beat-wise execution. Partial execution | ||||
|      * of an A=0 (no-accumulate) insn which does not execute the first | ||||
|      * beat must start with the current rda value, not 0. | ||||
|      */ | ||||
|     if (a->a || mve_skip_first_beat(s)) { | ||||
|         rda = tcg_temp_new_i64(); | ||||
|         rdalo = load_reg(s, a->rdalo); | ||||
|         rdahi = load_reg(s, a->rdahi); | ||||
|         tcg_gen_concat_i32_i64(rda, rdalo, rdahi); | ||||
|         tcg_temp_free_i32(rdalo); | ||||
|         tcg_temp_free_i32(rdahi); | ||||
|     } else { | ||||
|         rda = tcg_const_i64(0); | ||||
|     } | ||||
| 
 | ||||
|     fn(rda, cpu_env, qn, qm, rda); | ||||
|     tcg_temp_free_ptr(qn); | ||||
|     tcg_temp_free_ptr(qm); | ||||
| 
 | ||||
|     rdalo = tcg_temp_new_i32(); | ||||
|     rdahi = tcg_temp_new_i32(); | ||||
|     tcg_gen_extrl_i64_i32(rdalo, rda); | ||||
|     tcg_gen_extrh_i64_i32(rdahi, rda); | ||||
|     store_reg(s, a->rdalo, rdalo); | ||||
|     store_reg(s, a->rdahi, rdahi); | ||||
|     tcg_temp_free_i64(rda); | ||||
|     mve_update_eci(s); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) | ||||
| { | ||||
|     static MVEGenDualAccOpFn * const fns[4][2] = { | ||||
|         { NULL, NULL }, | ||||
|         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, | ||||
|         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, | ||||
|         { NULL, NULL }, | ||||
|     }; | ||||
|     return do_long_dual_acc(s, a, fns[a->size][a->x]); | ||||
| } | ||||
| 
 | ||||
| static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) | ||||
| { | ||||
|     static MVEGenDualAccOpFn * const fns[4][2] = { | ||||
|         { NULL, NULL }, | ||||
|         { gen_helper_mve_vmlaldavuh, NULL }, | ||||
|         { gen_helper_mve_vmlaldavuw, NULL }, | ||||
|         { NULL, NULL }, | ||||
|     }; | ||||
|     return do_long_dual_acc(s, a, fns[a->size][a->x]); | ||||
| } | ||||
|  | ||||
| @ -136,6 +136,11 @@ static inline int negate(DisasContext *s, int x) | ||||
|     return -x; | ||||
| } | ||||
| 
 | ||||
| static inline int plus_1(DisasContext *s, int x) | ||||
| { | ||||
|     return x + 1; | ||||
| } | ||||
| 
 | ||||
| static inline int plus_2(DisasContext *s, int x) | ||||
| { | ||||
|     return x + 2; | ||||
| @ -151,6 +156,11 @@ static inline int times_4(DisasContext *s, int x) | ||||
|     return x * 4; | ||||
| } | ||||
| 
 | ||||
| static inline int times_2_plus_1(DisasContext *s, int x) | ||||
| { | ||||
|     return x * 2 + 1; | ||||
| } | ||||
| 
 | ||||
| static inline int arm_dc_feature(DisasContext *dc, int feature) | ||||
| { | ||||
|     return (dc->features & (1ULL << feature)) != 0; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Peter Maydell
						Peter Maydell