From 70f897bdc4ce4101ec008317d43090f532bfb07d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 2 Oct 2015 21:09:54 +0000 Subject: [PATCH 01/10] tcg/ppc: Adjust exit_tb for change in prologue placement Changing the prologue to the beginning of the code_gen_buffer changes the direction of the "return" branch. Need to change the logic to match. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 92ef719e40..fd7a3e0425 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1855,12 +1855,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, if (USE_REG_RA) { ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); - /* If we can use a direct branch, otherwise use the value in RA. - Note that the direct branch is always forward. If it's in - range now, it'll still be in range after the movi. Don't - bother about the 20 bytes where the test here fails but it - would succeed below. */ - if (!in_range_b(disp)) { + /* Use a direct branch if we can, otherwise use the value in RA. + Note that the direct branch is always backward, thus we need + to account for the possibility of 5 insns from the movi. */ + if (!in_range_b(disp - 20)) { tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); tcg_out32(s, BCCTR | BO_ALWAYS); From 5bfd75a35c11dd3aa61c73d0d2cd88137c31519c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 2 Oct 2015 22:25:28 +0000 Subject: [PATCH 02/10] tcg/ppc: Revise goto_tb implementation Restrict the size of code_gen_buffer to 2GB on ppc64, which lets us assert that everything is reachable with addis+addi from tb_ret_addr. This lets us use a max of 4 insns for goto_tb instead of 7. Emit the indirect branch portion of goto_tb up front, which means we only have to update two insns to update any link. With a 64-bit store, we can update the link atomically, which may be required in future. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 49 ++++++++++++++++++++++++++++++++++---------- translate-all.c | 2 ++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index fd7a3e0425..cee13e0c21 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - TCGContext s; + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; - s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_out_b(&s, 0, (tcg_insn_unit *)addr); - flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); } static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) @@ -1869,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 7; - } else { - /* Indirect jump method. */ - tcg_abort(); + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: diff --git a/translate-all.c b/translate-all.c index 333eba4f5d..20ce40ec28 100644 --- a/translate-all.c +++ b/translate-all.c @@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__sparc__) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) +#elif defined(__powerpc64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__aarch64__) # define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) #elif defined(__arm__) From 1e1df962e325e18a5188c4814cd1a10215a48f79 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 2 Oct 2015 22:41:01 +0000 Subject: [PATCH 03/10] tcg/ppc: Prefer mask over andi. Prefer the instruction that isn't required to modify cr0. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index cee13e0c21..2c72565fb9 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -700,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) { int mb, me; - if ((c & 0xffff) == c) { + if (mask_operand(c, &mb, &me)) { + tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); + } else if ((c & 0xffff) == c) { tcg_out32(s, ANDI | SAI(src, dst, c)); return; } else if ((c & 0xffff0000) == c) { tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); return; - } else if (mask_operand(c, &mb, &me)) { - tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); } else { tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); @@ -719,18 +719,18 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) int mb, me; assert(TCG_TARGET_REG_BITS == 64); - if ((c & 0xffff) == c) { - tcg_out32(s, ANDI | SAI(src, dst, c)); - return; - } else if ((c & 0xffff0000) == c) { - tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); - return; - } else if (mask64_operand(c, &mb, &me)) { + if (mask64_operand(c, &mb, &me)) { if (mb == 0) { tcg_out_rld(s, RLDICR, dst, src, 0, me); } else { tcg_out_rld(s, RLDICL, dst, src, 0, mb); } + } else if ((c & 0xffff) == c) { + tcg_out32(s, ANDI | SAI(src, dst, c)); + return; + } else if ((c & 0xffff0000) == c) { + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); + return; } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); From c0e40dbdcc291c85faa289a53be60b7b1b7c7598 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:12 +0100 Subject: [PATCH 04/10] tcg-opc.h: Simplify insn_start def We already have a TLADDR_ARGS definition, so rearrange the order slightly and use it in the definition of insn_start, instead of having an #ifdef. Reviewed-by: Aurelien Jarno Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-2-git-send-email-james.hogan@imgtec.com> --- tcg/tcg-opc.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index c6f95703eb..6d0410c4b9 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -173,18 +173,15 @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) +#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) + /* QEMU specific */ -#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -DEF(insn_start, 0, 0, 2 * TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -#else -DEF(insn_start, 0, 0, TARGET_INSN_START_WORDS, TCG_OPF_NOT_PRESENT) -#endif +DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS, + TCG_OPF_NOT_PRESENT) DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) -#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) -#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) - DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1, From d76f36535099b5d3d880c5e3ac1d411420c8a086 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:13 +0100 Subject: [PATCH 05/10] disas/mips: Add R6 jr/jr.hb to disassembler MIPS r6 encodes jr as jalr zero, and jr.hb as jalr.hb zero, so add these encodings to the MIPS disassembly table. Reviewed-by: Aurelien Jarno Reviewed-by: Leon Alrae Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-3-git-send-email-james.hogan@imgtec.com> --- disas/mips.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/disas/mips.c b/disas/mips.c index 01336a8385..bf0bbaf86a 100644 --- a/disas/mips.c +++ b/disas/mips.c @@ -2420,9 +2420,11 @@ const struct mips_opcode mips_builtin_opcodes[] = {"hibernate","", 0x42000023, 0xffffffff, 0, 0, V1 }, {"ins", "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s, 0, I33 }, {"jr", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 }, +{"jr", "s", 0x00000009, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr */ /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with the same hazard barrier effect. */ {"jr.hb", "s", 0x00000408, 0xfc1fffff, UBD|RD_s, 0, I32 }, +{"jr.hb", "s", 0x00000409, 0xfc1fffff, UBD|RD_s, 0, I32R6 }, /* jalr.hb */ {"j", "s", 0x00000008, 0xfc1fffff, UBD|RD_s, 0, I1 }, /* jr */ /* SVR4 PIC code requires special handling for j, so it must be a macro. */ From ce14bd4d469f3a14f6cbfceb6360aee066a60d72 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:14 +0100 Subject: [PATCH 06/10] tcg/mips: Add use_mips32r6_instructions definition Add definition use_mips32r6_instructions to the MIPS TCG backend which is constant 1 when built for MIPS release 6. This will be used to decide between pre-R6 and R6 instruction encodings. Reviewed-by: Aurelien Jarno Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-4-git-send-email-james.hogan@imgtec.com> --- tcg/mips/tcg-target.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index f5ba52cacf..e579c10b9a 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -96,6 +96,13 @@ extern bool use_mips32_instructions; extern bool use_mips32r2_instructions; #endif +/* MIPS32R6 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6) +#define use_mips32r6_instructions 1 +#else +#define use_mips32r6_instructions 0 +#endif + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 From 6e0d096989be52c2b945fc83a9bd15d887bbdb47 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:15 +0100 Subject: [PATCH 07/10] tcg/mips: Support r6 JR encoding MIPSr6 encodes JR as JALR with zero as the link register, and the pre-r6 JR encoding is removed. Update TCG to use the new encoding when built for r6. We still use the old encoding for pre-r6, so as not to confuse return prediction stack hardware which may detect only particular encodings of the return instruction. Reviewed-by: Aurelien Jarno Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-5-git-send-email-james.hogan@imgtec.com> --- tcg/mips/tcg-target.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 4305af9673..c08418c413 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -288,7 +288,7 @@ typedef enum { OPC_SRLV = OPC_SPECIAL | 0x06, OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06, OPC_SRAV = OPC_SPECIAL | 0x07, - OPC_JR = OPC_SPECIAL | 0x08, + OPC_JR_R5 = OPC_SPECIAL | 0x08, OPC_JALR = OPC_SPECIAL | 0x09, OPC_MOVZ = OPC_SPECIAL | 0x0A, OPC_MOVN = OPC_SPECIAL | 0x0B, @@ -320,6 +320,9 @@ typedef enum { OPC_WSBH = OPC_SPECIAL3 | 0x0a0, OPC_SEB = OPC_SPECIAL3 | 0x420, OPC_SEH = OPC_SPECIAL3 | 0x620, + + /* MIPS r6 doesn't have JR, JALR should be used instead */ + OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, } MIPSInsn; /* From bc6d0c22b09a72897d9db4482076f89e7de97400 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:16 +0100 Subject: [PATCH 08/10] tcg/mips: Support r6 multiply/divide encodings MIPSr6 adds several new integer multiply, divide, and modulo instructions, and removes several pre-r6 encodings, along with the HI/LO registers which were the implicit operands of some of those instructions. Update TCG to use the new instructions when built for r6. The new instructions actually map much more directly to the TCG ops, as they only provide a single 32-bit half of the result and in a normal general purpose register instead of HI or LO. The mulu2_i32 and muls2_i32 operations are no longer appropriate for r6, so they are removed from the TCG opcode table. This is because they would need to emit two separate host instructions anyway (for the high and low half of the result), which TCG can arrange automatically for us in the absense of mulu2_i32/muls2_i32 by splitting it into mul_i32 and mul*h_i32 TCG ops. Reviewed-by: Aurelien Jarno Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-6-git-send-email-james.hogan@imgtec.com> --- tcg/mips/tcg-target.c | 42 +++++++++++++++++++++++++++++++++++++++++- tcg/mips/tcg-target.h | 4 ++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index c08418c413..a937b1475d 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -295,9 +295,17 @@ typedef enum { OPC_MFHI = OPC_SPECIAL | 0x10, OPC_MFLO = OPC_SPECIAL | 0x12, OPC_MULT = OPC_SPECIAL | 0x18, + OPC_MUL_R6 = OPC_SPECIAL | (0x02 << 6) | 0x18, + OPC_MUH = OPC_SPECIAL | (0x03 << 6) | 0x18, OPC_MULTU = OPC_SPECIAL | 0x19, + OPC_MULU = OPC_SPECIAL | (0x02 << 6) | 0x19, + OPC_MUHU = OPC_SPECIAL | (0x03 << 6) | 0x19, OPC_DIV = OPC_SPECIAL | 0x1A, + OPC_DIV_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1A, + OPC_MOD = OPC_SPECIAL | (0x03 << 6) | 0x1A, OPC_DIVU = OPC_SPECIAL | 0x1B, + OPC_DIVU_R6 = OPC_SPECIAL | (0x02 << 6) | 0x1B, + OPC_MODU = OPC_SPECIAL | (0x03 << 6) | 0x1B, OPC_ADDU = OPC_SPECIAL | 0x21, OPC_SUBU = OPC_SPECIAL | 0x23, OPC_AND = OPC_SPECIAL | 0x24, @@ -312,7 +320,7 @@ typedef enum { OPC_BGEZ = OPC_REGIMM | (0x01 << 16), OPC_SPECIAL2 = 0x1c << 26, - OPC_MUL = OPC_SPECIAL2 | 0x002, + OPC_MUL_R5 = OPC_SPECIAL2 | 0x002, OPC_SPECIAL3 = 0x1f << 26, OPC_EXT = OPC_SPECIAL3 | 0x000, @@ -323,6 +331,12 @@ typedef enum { /* MIPS r6 doesn't have JR, JALR should be used instead */ OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5, + + /* + * MIPS r6 replaces MUL with an alternative encoding which is + * backwards-compatible at the assembly level. + */ + OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5, } MIPSInsn; /* @@ -1448,21 +1462,45 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, i1 = OPC_MULT, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_mulsh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); + break; + } i1 = OPC_MULT, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_muluh_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); + break; + } i1 = OPC_MULTU, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_div_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_divu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFLO; goto do_hilo1; case INDEX_op_rem_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + break; + } i1 = OPC_DIV, i2 = OPC_MFHI; goto do_hilo1; case INDEX_op_remu_i32: + if (use_mips32r6_instructions) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + break; + } i1 = OPC_DIVU, i2 = OPC_MFHI; do_hilo1: tcg_out_opc_reg(s, i1, 0, a1, a2); @@ -1595,8 +1633,10 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +#if !use_mips32r6_instructions { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +#endif { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index e579c10b9a..b1cda37b66 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -112,8 +112,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_orc_i32 0 #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) +#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 From 137d63902faf4960081856db9242cbaf234a23af Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 2 Oct 2015 13:24:17 +0100 Subject: [PATCH 09/10] tcg/mips: Support r6 SEL{NE, EQ}Z instead of MOVN/MOVZ Extend MIPS movcond implementation to support the SELNEZ/SELEQZ instructions introduced in MIPS r6 (where MOVN/MOVZ have been removed). Whereas the "MOVN/MOVZ rd, rs, rt" instructions have the following semantics: rd = [!]rt ? rs : rd The "SELNEZ/SELEQZ rd, rs, rt" instructions are slightly different: rd = [!]rt ? rs : 0 First we ensure that if one of the movcond input values is zero that it comes last (we can swap the input arguments if we invert the condition). This is so that it can exactly match one of the SELNEZ/SELEQZ instructions and avoid the need to emit the other one. Otherwise we emit the opposite instruction first into a temporary register, and OR that into the result: SELNEZ/SELEQZ TMP1, v2, c1 SELEQZ/SELNEZ ret, v1, c1 OR ret, ret, TMP1 Which does the following: ret = cond ? v1 : v2 Reviewed-by: Aurelien Jarno Signed-off-by: James Hogan Signed-off-by: Richard Henderson Message-Id: <1443788657-14537-7-git-send-email-james.hogan@imgtec.com> --- tcg/mips/tcg-target.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index a937b1475d..79e052ff7a 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -314,6 +314,8 @@ typedef enum { OPC_NOR = OPC_SPECIAL | 0x27, OPC_SLT = OPC_SPECIAL | 0x2A, OPC_SLTU = OPC_SPECIAL | 0x2B, + OPC_SELEQZ = OPC_SPECIAL | 0x35, + OPC_SELNEZ = OPC_SPECIAL | 0x37, OPC_REGIMM = 0x01 << 26, OPC_BLTZ = OPC_REGIMM | (0x00 << 16), @@ -858,13 +860,20 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, } static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v) + TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) { - MIPSInsn m_opc = OPC_MOVN; + bool eqz = false; + + /* If one of the values is zero, put it last to match SEL*Z instructions */ + if (use_mips32r6_instructions && v1 == 0) { + v1 = v2; + v2 = 0; + cond = tcg_invert_cond(cond); + } switch (cond) { case TCG_COND_EQ: - m_opc = OPC_MOVZ; + eqz = true; /* FALLTHRU */ case TCG_COND_NE: if (c2 != 0) { @@ -877,14 +886,32 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, /* Minimize code size by preferring a compare not requiring INV. */ if (mips_cmp_map[cond] & MIPS_CMP_INV) { cond = tcg_invert_cond(cond); - m_opc = OPC_MOVZ; + eqz = true; } tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); c1 = TCG_TMP0; break; } - tcg_out_opc_reg(s, m_opc, ret, v, c1); + if (use_mips32r6_instructions) { + MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ; + MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ; + + if (v2 != 0) { + tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1); + } + tcg_out_opc_reg(s, m_opc_t, ret, v1, c1); + if (v2 != 0) { + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1); + } + } else { + MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN; + + tcg_out_opc_reg(s, m_opc, ret, v1, c1); + + /* This should be guaranteed via constraints */ + tcg_debug_assert(v2 == ret); + } } static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) @@ -1577,7 +1604,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_movcond_i32: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); + tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); break; case INDEX_op_setcond_i32: @@ -1666,7 +1693,11 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +#if use_mips32r6_instructions + { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, +#else { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +#endif { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, From 89a82cd4b6a90fe117fa715e2abe51d5c607560c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Sep 2015 15:33:53 -0700 Subject: [PATCH 10/10] cpu-exec: Add "nochain" debug flag Respect it to avoid linking TBs together. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- cpu-exec.c | 3 ++- include/qemu/log.h | 1 + qemu-log.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index 8fd56a69e0..7eef0830fe 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -477,7 +477,8 @@ int cpu_exec(CPUState *cpu) /* see if we can patch the calling TB. When the TB spans two pages, we cannot safely do a direct jump. */ - if (next_tb != 0 && tb->page_addr[1] == -1) { + if (next_tb != 0 && tb->page_addr[1] == -1 + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } diff --git a/include/qemu/log.h b/include/qemu/log.h index f880e66dbc..7de45001f2 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) #define LOG_UNIMP (1 << 10) #define LOG_GUEST_ERROR (1 << 11) #define CPU_LOG_MMU (1 << 12) +#define CPU_LOG_TB_NOCHAIN (1 << 13) /* Returns true if a bit is set in the current loglevel mask */ diff --git a/qemu-log.c b/qemu-log.c index 13f3813f68..efd07c81ea 100644 --- a/qemu-log.c +++ b/qemu-log.c @@ -119,6 +119,9 @@ const QEMULogItem qemu_log_items[] = { { LOG_GUEST_ERROR, "guest_errors", "log when the guest OS does something invalid (eg accessing a\n" "non-existent register)" }, + { CPU_LOG_TB_NOCHAIN, "nochain", + "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n" + "complete traces" }, { 0, NULL, NULL }, };