mirror of
https://github.com/qemu/qemu.git
synced 2025-08-15 05:06:56 +00:00
target/arm: Improve do_prewiden_3d
We can use proper widening loads to extend 32-bit inputs, and skip the "widenfn" step. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20201030022618.785675-12-richard.henderson@linaro.org Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
9f1a5f93c2
commit
8aab18a2c5
@ -1788,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
|
|||||||
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
||||||
NeonGenWidenFn *widenfn,
|
NeonGenWidenFn *widenfn,
|
||||||
NeonGenTwo64OpFn *opfn,
|
NeonGenTwo64OpFn *opfn,
|
||||||
bool src1_wide)
|
int src1_mop, int src2_mop)
|
||||||
{
|
{
|
||||||
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
|
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
|
||||||
TCGv_i64 rn0_64, rn1_64, rm_64;
|
TCGv_i64 rn0_64, rn1_64, rm_64;
|
||||||
TCGv_i32 rm;
|
|
||||||
|
|
||||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||||
return false;
|
return false;
|
||||||
@ -1804,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!widenfn || !opfn) {
|
if (!opfn) {
|
||||||
/* size == 3 case, which is an entirely different insn group */
|
/* size == 3 case, which is an entirely different insn group */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
|
if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1821,40 +1820,48 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||||||
rn1_64 = tcg_temp_new_i64();
|
rn1_64 = tcg_temp_new_i64();
|
||||||
rm_64 = tcg_temp_new_i64();
|
rm_64 = tcg_temp_new_i64();
|
||||||
|
|
||||||
if (src1_wide) {
|
if (src1_mop >= 0) {
|
||||||
read_neon_element64(rn0_64, a->vn, 0, MO_64);
|
read_neon_element64(rn0_64, a->vn, 0, src1_mop);
|
||||||
} else {
|
} else {
|
||||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||||
read_neon_element32(tmp, a->vn, 0, MO_32);
|
read_neon_element32(tmp, a->vn, 0, MO_32);
|
||||||
widenfn(rn0_64, tmp);
|
widenfn(rn0_64, tmp);
|
||||||
tcg_temp_free_i32(tmp);
|
tcg_temp_free_i32(tmp);
|
||||||
}
|
}
|
||||||
rm = tcg_temp_new_i32();
|
if (src2_mop >= 0) {
|
||||||
read_neon_element32(rm, a->vm, 0, MO_32);
|
read_neon_element64(rm_64, a->vm, 0, src2_mop);
|
||||||
|
} else {
|
||||||
|
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||||
|
read_neon_element32(tmp, a->vm, 0, MO_32);
|
||||||
|
widenfn(rm_64, tmp);
|
||||||
|
tcg_temp_free_i32(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
widenfn(rm_64, rm);
|
|
||||||
tcg_temp_free_i32(rm);
|
|
||||||
opfn(rn0_64, rn0_64, rm_64);
|
opfn(rn0_64, rn0_64, rm_64);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Load second pass inputs before storing the first pass result, to
|
* Load second pass inputs before storing the first pass result, to
|
||||||
* avoid incorrect results if a narrow input overlaps with the result.
|
* avoid incorrect results if a narrow input overlaps with the result.
|
||||||
*/
|
*/
|
||||||
if (src1_wide) {
|
if (src1_mop >= 0) {
|
||||||
read_neon_element64(rn1_64, a->vn, 1, MO_64);
|
read_neon_element64(rn1_64, a->vn, 1, src1_mop);
|
||||||
} else {
|
} else {
|
||||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||||
read_neon_element32(tmp, a->vn, 1, MO_32);
|
read_neon_element32(tmp, a->vn, 1, MO_32);
|
||||||
widenfn(rn1_64, tmp);
|
widenfn(rn1_64, tmp);
|
||||||
tcg_temp_free_i32(tmp);
|
tcg_temp_free_i32(tmp);
|
||||||
}
|
}
|
||||||
rm = tcg_temp_new_i32();
|
if (src2_mop >= 0) {
|
||||||
read_neon_element32(rm, a->vm, 1, MO_32);
|
read_neon_element64(rm_64, a->vm, 1, src2_mop);
|
||||||
|
} else {
|
||||||
|
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||||
|
read_neon_element32(tmp, a->vm, 1, MO_32);
|
||||||
|
widenfn(rm_64, tmp);
|
||||||
|
tcg_temp_free_i32(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
write_neon_element64(rn0_64, a->vd, 0, MO_64);
|
write_neon_element64(rn0_64, a->vd, 0, MO_64);
|
||||||
|
|
||||||
widenfn(rm_64, rm);
|
|
||||||
tcg_temp_free_i32(rm);
|
|
||||||
opfn(rn1_64, rn1_64, rm_64);
|
opfn(rn1_64, rn1_64, rm_64);
|
||||||
write_neon_element64(rn1_64, a->vd, 1, MO_64);
|
write_neon_element64(rn1_64, a->vd, 1, MO_64);
|
||||||
|
|
||||||
@ -1865,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
|
#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
|
||||||
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
|
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
|
||||||
{ \
|
{ \
|
||||||
static NeonGenWidenFn * const widenfn[] = { \
|
static NeonGenWidenFn * const widenfn[] = { \
|
||||||
gen_helper_neon_widen_##S##8, \
|
gen_helper_neon_widen_##S##8, \
|
||||||
gen_helper_neon_widen_##S##16, \
|
gen_helper_neon_widen_##S##16, \
|
||||||
tcg_gen_##EXT##_i32_i64, \
|
NULL, NULL, \
|
||||||
NULL, \
|
|
||||||
}; \
|
}; \
|
||||||
static NeonGenTwo64OpFn * const addfn[] = { \
|
static NeonGenTwo64OpFn * const addfn[] = { \
|
||||||
gen_helper_neon_##OP##l_u16, \
|
gen_helper_neon_##OP##l_u16, \
|
||||||
@ -1880,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||||||
tcg_gen_##OP##_i64, \
|
tcg_gen_##OP##_i64, \
|
||||||
NULL, \
|
NULL, \
|
||||||
}; \
|
}; \
|
||||||
return do_prewiden_3d(s, a, widenfn[a->size], \
|
int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
|
||||||
addfn[a->size], SRC1WIDE); \
|
return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
|
||||||
|
SRC1WIDE ? MO_Q : narrow_mop, \
|
||||||
|
narrow_mop); \
|
||||||
}
|
}
|
||||||
|
|
||||||
DO_PREWIDEN(VADDL_S, s, ext, add, false)
|
DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
|
||||||
DO_PREWIDEN(VADDL_U, u, extu, add, false)
|
DO_PREWIDEN(VADDL_U, u, add, false, 0)
|
||||||
DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
|
DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
|
||||||
DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
|
DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
|
||||||
DO_PREWIDEN(VADDW_S, s, ext, add, true)
|
DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
|
||||||
DO_PREWIDEN(VADDW_U, u, extu, add, true)
|
DO_PREWIDEN(VADDW_U, u, add, true, 0)
|
||||||
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
|
DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
|
||||||
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
|
DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
|
||||||
|
|
||||||
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
|
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
|
||||||
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
|
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
|
||||||
|
@ -1183,6 +1183,12 @@ static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
|
|||||||
long off = neon_element_offset(reg, ele, memop);
|
long off = neon_element_offset(reg, ele, memop);
|
||||||
|
|
||||||
switch (memop) {
|
switch (memop) {
|
||||||
|
case MO_SL:
|
||||||
|
tcg_gen_ld32s_i64(dest, cpu_env, off);
|
||||||
|
break;
|
||||||
|
case MO_UL:
|
||||||
|
tcg_gen_ld32u_i64(dest, cpu_env, off);
|
||||||
|
break;
|
||||||
case MO_Q:
|
case MO_Q:
|
||||||
tcg_gen_ld_i64(dest, cpu_env, off);
|
tcg_gen_ld_i64(dest, cpu_env, off);
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user