diff options
author | David 'Digit' Turner <digit@android.com> | 2011-05-11 18:19:41 +0200 |
---|---|---|
committer | David 'Digit' Turner <digit@android.com> | 2011-06-01 17:08:19 +0200 |
commit | f1d9bf153726533acf659efd796aa484dfd0b412 (patch) | |
tree | a3bcdccf0b81e1bc6e4ac6a21fb6b4777d4f0383 | |
parent | 280afa072a7b829e581d884c2b3276530a6014b7 (diff) | |
download | external_qemu-f1d9bf153726533acf659efd796aa484dfd0b412.zip external_qemu-f1d9bf153726533acf659efd796aa484dfd0b412.tar.gz external_qemu-f1d9bf153726533acf659efd796aa484dfd0b412.tar.bz2 |
tcg: integrate upstream version
Change-Id: Ifcdebc2e7179fbc64b46a9150e6dae62f86eba3c
-rw-r--r-- | Makefile.target | 5 | ||||
-rw-r--r-- | def-helper.h | 38 | ||||
-rw-r--r-- | exec.c | 5 | ||||
-rw-r--r-- | target-i386/ops_sse_header.h | 3 | ||||
-rw-r--r-- | tcg/README | 85 | ||||
-rw-r--r-- | tcg/TODO | 2 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c | 1072 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 38 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.c | 1839 | ||||
-rw-r--r-- | tcg/hppa/tcg-target.h | 148 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 2002 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 61 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 184 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 11 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.c | 44 | ||||
-rw-r--r-- | tcg/ppc64/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.c | 38 | ||||
-rw-r--r-- | tcg/sparc/tcg-target.h | 18 | ||||
-rw-r--r-- | tcg/tcg-op.h | 415 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 318 | ||||
-rw-r--r-- | tcg/tcg-runtime.h | 5 | ||||
-rw-r--r-- | tcg/tcg.c | 156 | ||||
-rw-r--r-- | tcg/tcg.h | 62 | ||||
-rw-r--r-- | translate-all.c | 4 |
24 files changed, 4356 insertions, 2206 deletions
diff --git a/Makefile.target b/Makefile.target index 75c79fe..b8eb26c 100644 --- a/Makefile.target +++ b/Makefile.target @@ -29,7 +29,10 @@ EMULATOR_TARGET_CFLAGS := \ -DNEED_CPU_H \ TCG_TARGET := $(HOST_ARCH) -ifeq ($(TCG_TARGET),x86) +ifeq ($(HOST_ARCH),x86) + TCG_TARGET := i386 +endif +ifeq ($(HOST_ARCH),x86_64) TCG_TARGET := i386 endif diff --git a/def-helper.h b/def-helper.h index 8a88c5b..8a822c7 100644 --- a/def-helper.h +++ b/def-helper.h @@ -81,9 +81,29 @@ #define dh_is_64bit_ptr (TCG_TARGET_REG_BITS == 64) #define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) +#define dh_is_signed_void 0 +#define dh_is_signed_i32 0 +#define dh_is_signed_s32 1 +#define dh_is_signed_i64 0 +#define dh_is_signed_s64 1 +#define dh_is_signed_f32 0 +#define dh_is_signed_f64 0 +#define dh_is_signed_tl 0 +#define dh_is_signed_int 1 +/* ??? This is highly specific to the host cpu. There are even special + extension instructions that may be required, e.g. ia64's addp4. But + for now we don't support any 64-bit targets with 32-bit pointers. */ +#define dh_is_signed_ptr 0 +#define dh_is_signed_env dh_is_signed_ptr +#define dh_is_signed(t) dh_is_signed_##t + +#define dh_sizemask(t, n) \ + sizemask |= dh_is_64bit(t) << (n*2); \ + sizemask |= dh_is_signed(t) << (n*2+1) + #define dh_arg(t, n) \ args[n - 1] = glue(GET_TCGV_, dh_alias(t))(glue(arg, n)); \ - sizemask |= dh_is_64bit(t) << n + dh_sizemask(t, n) #define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) @@ -138,8 +158,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1)) \ { \ TCGArg args[1]; \ - int sizemask; \ - sizemask = dh_is_64bit(ret); \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ dh_arg(t1, 1); \ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 1, args); \ } @@ -149,8 +169,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1 dh_arg_decl(t2, 2)) \ { \ TCGArg args[2]; \ - int sizemask; \ - sizemask = dh_is_64bit(ret); \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ dh_arg(t1, 1); \ dh_arg(t2, 2); \ tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 2, args); \ @@ -161,8 +181,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1 dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \ { \ TCGArg args[3]; \ - int sizemask; \ - sizemask = dh_is_64bit(ret); \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ dh_arg(t1, 1); \ dh_arg(t2, 2); \ dh_arg(t3, 3); \ @@ -174,8 +194,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1 dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \ { \ TCGArg args[4]; \ - int sizemask; \ - sizemask = dh_is_64bit(ret); \ + int sizemask = 0; \ + dh_sizemask(ret, 0); \ dh_arg(t1, 1); \ dh_arg(t2, 2); \ dh_arg(t3, 3); \ @@ -521,6 +521,11 @@ void cpu_exec_init_all(unsigned long tb_size) #if !defined(CONFIG_USER_ONLY) io_mem_init(); #endif +#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) + /* There's no guest base to take into account, so go ahead and + initialize the prologue now. */ + tcg_prologue_init(&tcg_ctx); +#endif } #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY) diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h index 03e9b17..20d47bb 100644 --- a/target-i386/ops_sse_header.h +++ b/target-i386/ops_sse_header.h @@ -31,6 +31,9 @@ #define dh_ctype_Reg Reg * #define dh_ctype_XMMReg XMMReg * #define dh_ctype_MMXReg MMXReg * +#define dh_is_signed_Reg dh_is_signed_ptr +#define dh_is_signed_XMMReg dh_is_signed_ptr +#define dh_is_signed_MMXReg dh_is_signed_ptr DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg) DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg) @@ -75,10 +75,13 @@ destroyed, but local temporaries and globals are preserved. * Helpers: Using the tcg_gen_helper_x_y it is possible to call any function -taking i32, i64 or pointer types. Before calling an helper, all -globals are stored at their canonical location and it is assumed that -the function can modify them. In the future, function modifiers will -be allowed to tell that the helper does not read or write some globals. +taking i32, i64 or pointer types. By default, before calling a helper, +all globals are stored at their canonical location and it is assumed +that the function can modify them. This can be overridden by the +TCG_CALL_CONST function modifier. By default, the helper is allowed to +modify the CPU state or raise an exception. This can be overridden by +the TCG_CALL_PURE function modifier, in which case the call to the +function is removed if the return value is not used. On some TCG targets (e.g. x86), several calling conventions are supported. @@ -210,7 +213,7 @@ t0=t1&~t2 * eqv_i32/i64 t0, t1, t2 -t0=~(t1^t2) +t0=~(t1^t2), or equivalently, t0=t1^~t2 * nand_i32/i64 t0, t1, t2 @@ -265,13 +268,13 @@ ext32u_i64 t0, t1 * bswap16_i32/i64 t0, t1 -16 bit byte swap on a 32/64 bit value. The two/six high order bytes must be -set to zero. +16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order +bytes are set to zero. * bswap32_i32/i64 t0, t1 -32 bit byte swap on a 32/64 bit value. With a 64 bit value, the four high -order bytes must be set to zero. +32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that +the four high order bytes are set to zero. * bswap64_i64 t0, t1 @@ -282,6 +285,28 @@ order bytes must be set to zero. Indicate that the value of t0 won't be used later. It is useful to force dead code elimination. +* deposit_i32/i64 dest, t1, t2, pos, len + +Deposit T2 as a bitfield into T1, placing the result in DEST. +The bitfield is described by POS/LEN, which are immediate values: + + LEN - the length of the bitfield + POS - the position of the first bit, counting from the LSB + +For example, pos=8, len=4 indicates a 4-bit field at bit 8. +This operation would be equivalent to + + dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) + + +********* Conditional moves + +* setcond_i32/i64 cond, dest, t1, t2 + +dest = (t1 cond t2) + +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. + ********* Type conversions * ext_i32_i64 t0, t1 @@ -323,9 +348,37 @@ st32_i64 t0, t1, offset write(t0, t1 + offset) Write 8, 16, 32 or 64 bits to host memory. +********* 64-bit target on 32-bit host support + +The following opcodes are internal to TCG. Thus they are to be implemented by +32-bit host code generators, but are not to be emitted by guest translators. +They are emitted as needed by inline functions within "tcg-op.h". + +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label + +Similar to brcond, except that the 64-bit values T0 and T1 +are formed from two 32-bit arguments. + +* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the 64-bit inputs T1 and T2 are +formed from two 32-bit arguments, and the 64-bit output T0 +is returned in two 32-bit outputs. + +* mulu2_i32 t0_low, t0_high, t1, t2 + +Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding +the full 64-bit product T0. The later is returned in two 32-bit outputs. + +* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high + +Similar to setcond, except that the 64-bit values T1 and T2 are +formed from two 32-bit arguments. The result is a 32-bit value. + ********* QEMU specific operations -* tb_exit t0 +* exit_tb t0 Exit the current TB and return the value t0 (word type). @@ -339,13 +392,17 @@ instructions. qemu_ld8s t0, t1, flags qemu_ld16u t0, t1, flags qemu_ld16s t0, t1, flags +qemu_ld32 t0, t1, flags qemu_ld32u t0, t1, flags qemu_ld32s t0, t1, flags qemu_ld64 t0, t1, flags -Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU -address type. 'flags' contains the QEMU memory index (selects user or -kernel access) for example. +Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address +type. 'flags' contains the QEMU memory index (selects user or kernel access) +for example. + +Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and +"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits. * qemu_st8 t0, t1, flags qemu_st16 t0, t1, flags @@ -445,7 +502,7 @@ register. the speed of the translation. - Don't hesitate to use helpers for complicated or seldom used target - intructions. There is little performance advantage in using TCG to + instructions. There is little performance advantage in using TCG to implement target instructions taking more than about twenty TCG instructions. @@ -1,4 +1,4 @@ -- Add new instructions such as: setcond, clz, ctz, popcnt. +- Add new instructions such as: clz, ctz, popcnt. - See if it is worth exporting mul2, mulu2, div2, divu2. diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index f8d626d..fb858d8 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -22,6 +22,51 @@ * THE SOFTWARE. */ +#if defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7EM__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7R__) +#define USE_ARMV7_INSTRUCTIONS +#endif + +#if defined(USE_ARMV7_INSTRUCTIONS) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) +#define USE_ARMV6_INSTRUCTIONS +#endif + +#if defined(USE_ARMV6_INSTRUCTIONS) || \ + defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +#define USE_ARMV5_INSTRUCTIONS +#endif + +#ifdef USE_ARMV5_INSTRUCTIONS +static const int use_armv5_instructions = 1; +#else +static const int use_armv5_instructions = 0; +#endif +#undef USE_ARMV5_INSTRUCTIONS + +#ifdef USE_ARMV6_INSTRUCTIONS +static const int use_armv6_instructions = 1; +#else +static const int use_armv6_instructions = 0; +#endif +#undef USE_ARMV6_INSTRUCTIONS + +#ifdef USE_ARMV7_INSTRUCTIONS +static const int use_armv7_instructions = 1; +#else +static const int use_armv7_instructions = 0; +#endif +#undef USE_ARMV7_INSTRUCTIONS + #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", @@ -39,14 +84,11 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r12", "%r13", "%r14", + "%pc", }; #endif static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, @@ -55,8 +97,12 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R9, TCG_REG_R10, TCG_REG_R11, - TCG_REG_R12, TCG_REG_R13, + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R12, TCG_REG_R14, }; @@ -67,12 +113,25 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_R0, TCG_REG_R1 }; +static inline void reloc_abs32(void *code_ptr, tcg_target_long target) +{ + *(uint32_t *) code_ptr = target; +} + +static inline void reloc_pc24(void *code_ptr, tcg_target_long target) +{ + uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2); + + *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) + | (offset & 0xffffff); +} + static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { switch (type) { case R_ARM_ABS32: - *(uint32_t *) code_ptr = value; + reloc_abs32(code_ptr, value); break; case R_ARM_CALL: @@ -81,8 +140,7 @@ static void patch_reloc(uint8_t *code_ptr, int type, tcg_abort(); case R_ARM_PC24: - *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) | - (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff); + reloc_pc24(code_ptr, value); break; } } @@ -105,69 +163,54 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) break; case 'r': -#ifndef CONFIG_SOFTMMU - case 'd': - case 'D': - case 'x': - case 'X': -#endif ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); break; -#ifdef CONFIG_SOFTMMU - /* qemu_ld/st inputs (unless 'X', 'd' or 'D') */ - case 'x': + /* qemu_ld address */ + case 'l': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU + /* r0 and r1 will be overwritten when reading the tlb entry, + so don't use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +#endif break; - - /* qemu_ld64 data_reg */ - case 'd': + case 'L': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r1 is still needed to load data_reg2, so don't use it. */ +#ifdef CONFIG_SOFTMMU + /* r1 is still needed to load data_reg or data_reg2, + so don't use it. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +#endif break; - /* qemu_ld/st64 data_reg2 */ - case 'D': + /* qemu_st address & data_reg */ + case 's': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0, r1 and optionally r2 will be overwritten by the address - * and the low word of data, so don't use these. */ + /* r0 and r1 will be overwritten when reading the tlb entry + (softmmu only) and doing the byte swapping, so don't + use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); -# if TARGET_LONG_BITS == 64 - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); -# endif break; - -# if TARGET_LONG_BITS == 64 - /* qemu_ld/st addr_reg2 */ - case 'X': + /* qemu_st64 data_reg2 */ + case 'S': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - /* r0 will be overwritten by the low word of base, so don't use it. */ + /* r0 and r1 will be overwritten when reading the tlb entry + (softmmu only) and doing the byte swapping, so don't + use these. */ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); - break; -# endif +#ifdef CONFIG_SOFTMMU + /* r2 is still needed to load data_reg, so don't use it. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); #endif - - case '1': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - break; - - case '2': - ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); break; default: @@ -309,6 +352,9 @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) static inline void tcg_out_b_noaddr(TCGContext *s, int cond) { + /* We pay attention here to not modify the branch target by skipping + the corresponding bytes. This ensure that caches and memory are + kept coherent during retranslation. */ #ifdef HOST_WORDS_BIGENDIAN tcg_out8(s, (cond << 4) | 0x0a); s->code_ptr += 3; @@ -324,6 +370,17 @@ static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) (((offset - 8) >> 2) & 0x00ffffff)); } +static inline void tcg_out_blx(TCGContext *s, int cond, int rn) +{ + tcg_out32(s, (cond << 28) | 0x012fff30 | rn); +} + +static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) +{ + tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | + (((offset - 8) >> 2) & 0x00ffffff)); +} + static inline void tcg_out_dat_reg(TCGContext *s, int cond, int opc, int rd, int rn, int rm, int shift) { @@ -358,42 +415,38 @@ static inline void tcg_out_dat_imm(TCGContext *s, } static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, int32_t arg) + int cond, int rd, uint32_t arg) { - int offset = (uint32_t) arg - ((uint32_t) s->code_ptr + 8); - /* TODO: This is very suboptimal, we can easily have a constant * pool somewhere after all the instructions. */ + if ((int)arg < 0 && (int)arg >= -0x100) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); + } else if (use_armv7_instructions) { + /* use movw/movt */ + /* movw */ + tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) + | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); + if (arg & 0xffff0000) { + /* movt */ + tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) + | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); + } + } else { + int opc = ARITH_MOV; + int rn = 0; - if (arg < 0 && arg > -0x100) - return tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - - if (offset < 0x100 && offset > -0x100) - return offset >= 0 ? - tcg_out_dat_imm(s, cond, ARITH_ADD, rd, 15, offset) : - tcg_out_dat_imm(s, cond, ARITH_SUB, rd, 15, -offset); - -#ifdef __ARM_ARCH_7A__ - /* use movw/movt */ - /* movw */ - tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) - | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); - if (arg & 0xffff0000) - /* movt */ - tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) - | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); -#else - tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, arg & 0xff); - if (arg & 0x0000ff00) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 8) & 0xff) | 0xc00); - if (arg & 0x00ff0000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 16) & 0xff) | 0x800); - if (arg & 0xff000000) - tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd, - ((arg >> 24) & 0xff) | 0x400); -#endif + do { + int i, rot; + + i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_ORR; + rn = rd; + } while (arg); + } } static inline void tcg_out_mul32(TCGContext *s, @@ -409,7 +462,7 @@ static inline void tcg_out_mul32(TCGContext *s, tcg_out32(s, (cond << 28) | ( 8 << 16) | (0 << 12) | (rs << 8) | 0x90 | rm); tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, 8, SHIFT_IMM_LSL(0)); + rd, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); } } @@ -447,6 +500,101 @@ static inline void tcg_out_smull32(TCGContext *s, } } +static inline void tcg_out_ext8s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxtb */ + tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(24)); + } +} + +static inline void tcg_out_ext8u(TCGContext *s, int cond, + int rd, int rn) +{ + tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); +} + +static inline void tcg_out_ext16s(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* sxth */ + tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_ASR(16)); + } +} + +static inline void tcg_out_ext16u(TCGContext *s, int cond, + int rd, int rn) +{ + if (use_armv6_instructions) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_LSL(16)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rd, SHIFT_IMM_LSR(16)); + } +} + +static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_ASR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + } +} + +static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) +{ + if (use_armv6_instructions) { + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); + } else { + tcg_out_dat_reg(s, cond, ARITH_EOR, + TCG_REG_R8, rn, rn, SHIFT_IMM_ROR(16)); + tcg_out_dat_imm(s, cond, ARITH_BIC, + TCG_REG_R8, TCG_REG_R8, 0xff | 0x800); + tcg_out_dat_reg(s, cond, ARITH_MOV, + rd, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_reg(s, cond, ARITH_EOR, + rd, rd, TCG_REG_R8, SHIFT_IMM_LSR(8)); + } +} + static inline void tcg_out_ld32_12(TCGContext *s, int cond, int rd, int rn, tcg_target_long im) { @@ -511,7 +659,7 @@ static inline void tcg_out_ld16u_8(TCGContext *s, int cond, (((-im) & 0xf0) << 4) | ((-im) & 0xf)); } -static inline void tcg_out_st16u_8(TCGContext *s, int cond, +static inline void tcg_out_st16_8(TCGContext *s, int cond, int rd, int rn, tcg_target_long im) { if (im >= 0) @@ -531,7 +679,7 @@ static inline void tcg_out_ld16u_r(TCGContext *s, int cond, (rn << 16) | (rd << 12) | rm); } -static inline void tcg_out_st16u_r(TCGContext *s, int cond, +static inline void tcg_out_st16_r(TCGContext *s, int cond, int rd, int rn, int rm) { tcg_out32(s, (cond << 28) | 0x018000b0 | @@ -551,19 +699,6 @@ static inline void tcg_out_ld16s_8(TCGContext *s, int cond, (((-im) & 0xf0) << 4) | ((-im) & 0xf)); } -static inline void tcg_out_st16s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) -{ - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01c000f0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x014000f0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); -} - static inline void tcg_out_ld16s_r(TCGContext *s, int cond, int rd, int rn, int rm) { @@ -571,13 +706,6 @@ static inline void tcg_out_ld16s_r(TCGContext *s, int cond, (rn << 16) | (rd << 12) | rm); } -static inline void tcg_out_st16s_r(TCGContext *s, int cond, - int rd, int rn, int rm) -{ - tcg_out32(s, (cond << 28) | 0x018000f0 | - (rn << 16) | (rd << 12) | rm); -} - static inline void tcg_out_ld8_12(TCGContext *s, int cond, int rd, int rn, tcg_target_long im) { @@ -627,19 +755,6 @@ static inline void tcg_out_ld8s_8(TCGContext *s, int cond, (((-im) & 0xf0) << 4) | ((-im) & 0xf)); } -static inline void tcg_out_st8s_8(TCGContext *s, int cond, - int rd, int rn, tcg_target_long im) -{ - if (im >= 0) - tcg_out32(s, (cond << 28) | 0x01c000d0 | - (rn << 16) | (rd << 12) | - ((im & 0xf0) << 4) | (im & 0xf)); - else - tcg_out32(s, (cond << 28) | 0x014000d0 | - (rn << 16) | (rd << 12) | - (((-im) & 0xf0) << 4) | ((-im) & 0xf)); -} - static inline void tcg_out_ld8s_r(TCGContext *s, int cond, int rd, int rn, int rm) { @@ -647,13 +762,6 @@ static inline void tcg_out_ld8s_r(TCGContext *s, int cond, (rn << 16) | (rd << 12) | rm); } -static inline void tcg_out_st8s_r(TCGContext *s, int cond, - int rd, int rn, int rm) -{ - tcg_out32(s, (cond << 28) | 0x018000d0 | - (rn << 16) | (rd << 12) | rm); -} - static inline void tcg_out_ld32u(TCGContext *s, int cond, int rd, int rn, int32_t offset) { @@ -694,14 +802,14 @@ static inline void tcg_out_ld16s(TCGContext *s, int cond, tcg_out_ld16s_8(s, cond, rd, rn, offset); } -static inline void tcg_out_st16u(TCGContext *s, int cond, +static inline void tcg_out_st16(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xff || offset < -0xff) { tcg_out_movi32(s, cond, TCG_REG_R8, offset); - tcg_out_st16u_r(s, cond, rd, rn, TCG_REG_R8); + tcg_out_st16_r(s, cond, rd, rn, TCG_REG_R8); } else - tcg_out_st16u_8(s, cond, rd, rn, offset); + tcg_out_st16_8(s, cond, rd, rn, offset); } static inline void tcg_out_ld8u(TCGContext *s, int cond, @@ -724,7 +832,7 @@ static inline void tcg_out_ld8s(TCGContext *s, int cond, tcg_out_ld8s_8(s, cond, rd, rn, offset); } -static inline void tcg_out_st8u(TCGContext *s, int cond, +static inline void tcg_out_st8(TCGContext *s, int cond, int rd, int rn, int32_t offset) { if (offset > 0xfff || offset < -0xfff) { @@ -738,6 +846,11 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) { int32_t val; + if (addr & 1) { + /* goto to a Thumb destination isn't supported */ + tcg_abort(); + } + val = addr - (tcg_target_long) s->code_ptr; if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd) tcg_out_b(s, cond, val); @@ -746,60 +859,60 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr) tcg_abort(); #else if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, 15, 15, -4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ } else { tcg_out_movi32(s, cond, TCG_REG_R8, val - 8); tcg_out_dat_reg(s, cond, ARITH_ADD, - 15, 15, TCG_REG_R8, SHIFT_IMM_LSL(0)); + TCG_REG_PC, TCG_REG_PC, + TCG_REG_R8, SHIFT_IMM_LSL(0)); } #endif } } -static inline void tcg_out_call(TCGContext *s, int cond, uint32_t addr) +static inline void tcg_out_call(TCGContext *s, uint32_t addr) { int32_t val; -#ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R8, 0, 14, SHIFT_IMM_LSL(0)); -#endif - val = addr - (tcg_target_long) s->code_ptr; - if (val < 0x01fffffd && val > -0x01fffffd) - tcg_out_bl(s, cond, val); - else { + if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) { + if (addr & 1) { + /* Use BLX if the target is in Thumb mode */ + if (!use_armv5_instructions) { + tcg_abort(); + } + tcg_out_blx_imm(s, val); + } else { + tcg_out_bl(s, COND_AL, val); + } + } else { #if 1 tcg_abort(); #else if (cond == COND_AL) { - tcg_out_dat_imm(s, cond, ARITH_ADD, 14, 15, 4); - tcg_out_ld32_12(s, COND_AL, 15, 15, -4); + tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */ } else { tcg_out_movi32(s, cond, TCG_REG_R9, addr); - tcg_out_dat_imm(s, cond, ARITH_MOV, 14, 0, 15); + tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, + TCG_REG_PC, SHIFT_IMM_LSL(0)); tcg_out_bx(s, cond, TCG_REG_R9); } #endif } - -#ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); -#endif } static inline void tcg_out_callr(TCGContext *s, int cond, int arg) { -#ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R8, 0, 14, SHIFT_IMM_LSL(0)); -#endif - /* TODO: on ARMv5 and ARMv6 replace with tcg_out_blx(s, cond, arg); */ - tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 15, SHIFT_IMM_LSL(0)); - tcg_out_bx(s, cond, arg); -#ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, TCG_REG_R8, SHIFT_IMM_LSL(0)); -#endif + if (use_armv5_instructions) { + tcg_out_blx(s, cond, arg); + } else { + tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, + TCG_REG_PC, SHIFT_IMM_LSL(0)); + tcg_out_bx(s, cond, arg); + } } static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) @@ -809,7 +922,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) if (l->has_value) tcg_out_goto(s, cond, l->u.value); else if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, 15, 15, -4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337); s->code_ptr += 4; } else { @@ -819,57 +932,6 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) } } -static void tcg_out_div_helper(TCGContext *s, int cond, const TCGArg *args, - void *helper_div, void *helper_rem, int shift) -{ - int div_reg = args[0]; - int rem_reg = args[1]; - - /* stmdb sp!, { r0 - r3, ip, lr } */ - /* (Note that we need an even number of registers as per EABI) */ - tcg_out32(s, (cond << 28) | 0x092d500f); - - tcg_out_dat_reg(s, cond, ARITH_MOV, 0, 0, args[2], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 1, 0, args[3], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 2, 0, args[4], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 3, 0, 2, shift); - - tcg_out_call(s, cond, (uint32_t) helper_div); - tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 0, SHIFT_IMM_LSL(0)); - - /* ldmia sp, { r0 - r3, fp, lr } */ - tcg_out32(s, (cond << 28) | 0x089d500f); - - tcg_out_dat_reg(s, cond, ARITH_MOV, 0, 0, args[2], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 1, 0, args[3], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 2, 0, args[4], SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, 3, 0, 2, shift); - - tcg_out_call(s, cond, (uint32_t) helper_rem); - - tcg_out_dat_reg(s, cond, ARITH_MOV, rem_reg, 0, 0, SHIFT_IMM_LSL(0)); - tcg_out_dat_reg(s, cond, ARITH_MOV, div_reg, 0, 8, SHIFT_IMM_LSL(0)); - - /* ldr r0, [sp], #4 */ - if (rem_reg != 0 && div_reg != 0) - tcg_out32(s, (cond << 28) | 0x04bd0004); - /* ldr r1, [sp], #4 */ - if (rem_reg != 1 && div_reg != 1) - tcg_out32(s, (cond << 28) | 0x04bd1004); - /* ldr r2, [sp], #4 */ - if (rem_reg != 2 && div_reg != 2) - tcg_out32(s, (cond << 28) | 0x04bd2004); - /* ldr r3, [sp], #4 */ - if (rem_reg != 3 && div_reg != 3) - tcg_out32(s, (cond << 28) | 0x04bd3004); - /* ldr ip, [sp], #4 */ - if (rem_reg != 12 && div_reg != 12) - tcg_out32(s, (cond << 28) | 0x04bdc004); - /* ldr lr, [sp], #4 */ - if (rem_reg != 14 && div_reg != 14) - tcg_out32(s, (cond << 28) | 0x04bde004); -} - #ifdef CONFIG_SOFTMMU #include "../../softmmu_defs.h" @@ -891,10 +953,9 @@ static void *qemu_st_helpers[4] = { #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) -static inline void tcg_out_qemu_ld(TCGContext *s, int cond, - const TCGArg *args, int opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2; + int addr_reg, data_reg, data_reg2, bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; # if TARGET_LONG_BITS == 64 @@ -903,6 +964,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, uint32_t *label_ptr; #endif +#ifdef TARGET_WORDS_BIGENDIAN + bswap = 1; +#else + bswap = 0; +#endif data_reg = *args++; if (opc == 3) data_reg2 = *args++; @@ -924,12 +990,12 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, # if CPU_TLB_BITS > 8 # error # endif - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - 8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8, + 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); tcg_out_dat_imm(s, COND_AL, ARITH_AND, - 0, 8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - 0, TCG_AREG0, 0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, + TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_read))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and @@ -938,13 +1004,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, * before. */ if (mem_index) - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 0, 0, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); - tcg_out_ld32_12(s, COND_AL, 1, 0, + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, offsetof(CPUState, tlb_table[0][0].addr_read)); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, - 0, 1, 8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, + TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ if (s_bits) tcg_out_dat_imm(s, COND_EQ, ARITH_TST, @@ -952,95 +1018,101 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, # if TARGET_LONG_BITS == 64 /* XXX: possibly we could use a block data load or writeback in * the first access. */ - tcg_out_ld32_12(s, COND_EQ, 1, 0, + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, offsetof(CPUState, tlb_table[0][0].addr_read) + 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, - 0, 1, addr_reg2, SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif - tcg_out_ld32_12(s, COND_EQ, 1, 0, + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, offsetof(CPUState, tlb_table[0][0].addend)); switch (opc) { case 0: - tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, 1); + tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); break; case 0 | 4: - tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, 1); + tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); break; case 1: - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, 1); + tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + if (bswap) { + tcg_out_bswap16(s, COND_EQ, data_reg, data_reg); + } break; case 1 | 4: - tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, 1); + if (bswap) { + tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg); + } else { + tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + } break; case 2: default: - tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, 1); + tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + if (bswap) { + tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + } break; case 3: - tcg_out_ld32_rwb(s, COND_EQ, data_reg, 1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg2, 1, 4); + if (bswap) { + tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2); + tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + } else { + tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + } break; } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); - -# ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 14, SHIFT_IMM_LSL(0)); -# endif + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ - if (addr_reg) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 0, 0, addr_reg, SHIFT_IMM_LSL(0)); + if (addr_reg != TCG_REG_R0) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0)); + } # if TARGET_LONG_BITS == 32 - tcg_out_dat_imm(s, cond, ARITH_MOV, 1, 0, mem_index); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index); # else - if (addr_reg2 != 1) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, addr_reg2, SHIFT_IMM_LSL(0)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0)); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); # endif - tcg_out_bl(s, cond, (tcg_target_long) qemu_ld_helpers[s_bits] - - (tcg_target_long) s->code_ptr); + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); switch (opc) { case 0 | 4: - tcg_out_dat_reg(s, cond, ARITH_MOV, - 0, 0, 0, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - data_reg, 0, 0, SHIFT_IMM_ASR(24)); + tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); break; case 1 | 4: - tcg_out_dat_reg(s, cond, ARITH_MOV, - 0, 0, 0, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - data_reg, 0, 0, SHIFT_IMM_ASR(16)); + tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); break; case 0: case 1: case 2: default: - if (data_reg) - tcg_out_dat_reg(s, cond, ARITH_MOV, - data_reg, 0, 0, SHIFT_IMM_LSL(0)); + if (data_reg != TCG_REG_R0) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0)); + } break; case 3: - if (data_reg != 0) - tcg_out_dat_reg(s, cond, ARITH_MOV, - data_reg, 0, 0, SHIFT_IMM_LSL(0)); - if (data_reg2 != 1) - tcg_out_dat_reg(s, cond, ARITH_MOV, - data_reg2, 0, 1, SHIFT_IMM_LSL(0)); + if (data_reg != TCG_REG_R0) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0)); + } + if (data_reg2 != TCG_REG_R1) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0)); + } break; } -# ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 8, SHIFT_IMM_LSL(0)); -# endif - - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1051,9 +1123,9 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, i = ctz32(offset) & ~1; rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 8, addr_reg, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg, ((offset >> i) & 0xff) | rot); - addr_reg = 8; + addr_reg = TCG_REG_R8; offset &= ~(0xff << i); } } @@ -1066,33 +1138,47 @@ static inline void tcg_out_qemu_ld(TCGContext *s, int cond, break; case 1: tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); + if (bswap) { + tcg_out_bswap16(s, COND_AL, data_reg, data_reg); + } break; case 1 | 4: - tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0); + if (bswap) { + tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); + } else { + tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0); + } break; case 2: default: tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0); + if (bswap) { + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); + } break; case 3: /* TODO: use block load - * check that data_reg2 > data_reg or the other way */ if (data_reg == addr_reg) { - tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, 4); - tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); + tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); } else { - tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0); - tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, 4); + tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); + tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); + } + if (bswap) { + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); } break; } #endif } -static inline void tcg_out_qemu_st(TCGContext *s, int cond, - const TCGArg *args, int opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2; + int addr_reg, data_reg, data_reg2, bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; # if TARGET_LONG_BITS == 64 @@ -1101,6 +1187,11 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, uint32_t *label_ptr; #endif +#ifdef TARGET_WORDS_BIGENDIAN + bswap = 1; +#else + bswap = 0; +#endif data_reg = *args++; if (opc == 3) data_reg2 = *args++; @@ -1120,11 +1211,11 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS */ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - 8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + TCG_REG_R8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); tcg_out_dat_imm(s, COND_AL, ARITH_AND, - 0, 8, CPU_TLB_SIZE - 1); - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - 0, TCG_AREG0, 0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, + TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); /* In the * ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_write))] * below, the offset is likely to exceed 12 bits if mem_index != 0 and @@ -1133,13 +1224,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, * before. */ if (mem_index) - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 0, 0, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, (mem_index << (TLB_SHIFT & 1)) | ((16 - (TLB_SHIFT >> 1)) << 8)); - tcg_out_ld32_12(s, COND_AL, 1, 0, + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, offsetof(CPUState, tlb_table[0][0].addr_write)); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, - 0, 1, 8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, + TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); /* Check alignment. */ if (s_bits) tcg_out_dat_imm(s, COND_EQ, ARITH_TST, @@ -1147,125 +1238,121 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, # if TARGET_LONG_BITS == 64 /* XXX: possibly we could use a block data load or writeback in * the first access. */ - tcg_out_ld32_12(s, COND_EQ, 1, 0, - offsetof(CPUState, tlb_table[0][0].addr_write) - + 4); - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, - 0, 1, addr_reg2, SHIFT_IMM_LSL(0)); + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, + offsetof(CPUState, tlb_table[0][0].addr_write) + 4); + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); # endif - tcg_out_ld32_12(s, COND_EQ, 1, 0, + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, offsetof(CPUState, tlb_table[0][0].addend)); switch (opc) { case 0: - tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, 1); - break; - case 0 | 4: - tcg_out_st8s_r(s, COND_EQ, data_reg, addr_reg, 1); + tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); break; case 1: - tcg_out_st16u_r(s, COND_EQ, data_reg, addr_reg, 1); - break; - case 1 | 4: - tcg_out_st16s_r(s, COND_EQ, data_reg, addr_reg, 1); + if (bswap) { + tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg); + tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + } else { + tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + } break; case 2: default: - tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, 1); + if (bswap) { + tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); + tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + } else { + tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + } break; case 3: - tcg_out_st32_rwb(s, COND_EQ, data_reg, 1, addr_reg); - tcg_out_st32_12(s, COND_EQ, data_reg2, 1, 4); + if (bswap) { + tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); + tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); + tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); + } else { + tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); + tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + } break; } label_ptr = (void *) s->code_ptr; - tcg_out_b(s, COND_EQ, 8); + tcg_out_b_noaddr(s, COND_EQ); /* TODO: move this code to where the constants pool will be */ - if (addr_reg) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 0, 0, addr_reg, SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0)); # if TARGET_LONG_BITS == 32 switch (opc) { case 0: - tcg_out_dat_imm(s, cond, ARITH_AND, 1, data_reg, 0xff); - tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index); + tcg_out_ext8u(s, COND_AL, TCG_REG_R1, data_reg); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); break; case 1: - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, data_reg, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, 1, SHIFT_IMM_LSR(16)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index); + tcg_out_ext16u(s, COND_AL, TCG_REG_R1, data_reg); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); break; case 2: - if (data_reg != 1) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, data_reg, SHIFT_IMM_LSL(0)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R1, 0, data_reg, SHIFT_IMM_LSL(0)); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); break; case 3: - if (data_reg != 1) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, data_reg, SHIFT_IMM_LSL(0)); - if (data_reg2 != 2) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 2, 0, data_reg2, SHIFT_IMM_LSL(0)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R8, 0, mem_index); + tcg_out32(s, (COND_AL << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */ + if (data_reg != TCG_REG_R2) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0)); + } + if (data_reg2 != TCG_REG_R3) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R3, 0, data_reg2, SHIFT_IMM_LSL(0)); + } break; } # else - if (addr_reg2 != 1) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 1, 0, addr_reg2, SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0)); switch (opc) { case 0: - tcg_out_dat_imm(s, cond, ARITH_AND, 2, data_reg, 0xff); - tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index); + tcg_out_ext8u(s, COND_AL, TCG_REG_R2, data_reg); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index); break; case 1: - tcg_out_dat_reg(s, cond, ARITH_MOV, - 2, 0, data_reg, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - 2, 0, 2, SHIFT_IMM_LSR(16)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index); + tcg_out_ext16u(s, COND_AL, TCG_REG_R2, data_reg); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index); break; case 2: - if (data_reg != 2) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 2, 0, data_reg, SHIFT_IMM_LSL(0)); - tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index); + if (data_reg != TCG_REG_R2) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0)); + } + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index); break; case 3: - tcg_out_dat_imm(s, cond, ARITH_MOV, 8, 0, mem_index); - tcg_out32(s, (cond << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */ - if (data_reg != 2) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 2, 0, data_reg, SHIFT_IMM_LSL(0)); - if (data_reg2 != 3) - tcg_out_dat_reg(s, cond, ARITH_MOV, - 3, 0, data_reg2, SHIFT_IMM_LSL(0)); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R8, 0, mem_index); + tcg_out32(s, (COND_AL << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */ + if (data_reg != TCG_REG_R2) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0)); + } + if (data_reg2 != TCG_REG_R3) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, + TCG_REG_R3, 0, data_reg2, SHIFT_IMM_LSL(0)); + } break; } # endif -# ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 14, SHIFT_IMM_LSL(0)); -# endif - - tcg_out_bl(s, cond, (tcg_target_long) qemu_st_helpers[s_bits] - - (tcg_target_long) s->code_ptr); -# if TARGET_LONG_BITS == 64 + tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); if (opc == 3) - tcg_out_dat_imm(s, cond, ARITH_ADD, 13, 13, 0x10); -# endif + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10); -# ifdef SAVE_LR - tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 8, SHIFT_IMM_LSL(0)); -# endif - - *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2; + reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1276,9 +1363,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, i = ctz32(offset) & ~1; rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 8, addr_reg, + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg, ((offset >> i) & 0xff) | rot); - addr_reg = 8; + addr_reg = TCG_REG_R1; offset &= ~(0xff << i); } } @@ -1286,24 +1373,35 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, case 0: tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0); break; - case 0 | 4: - tcg_out_st8s_8(s, COND_AL, data_reg, addr_reg, 0); - break; case 1: - tcg_out_st16u_8(s, COND_AL, data_reg, addr_reg, 0); - break; - case 1 | 4: - tcg_out_st16s_8(s, COND_AL, data_reg, addr_reg, 0); + if (bswap) { + tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0); + } else { + tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0); + } break; case 2: default: - tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0); + } else { + tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); + } break; case 3: /* TODO: use block store - * check that data_reg2 > data_reg or the other way */ - tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); - tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4); + if (bswap) { + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4); + } else { + tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0); + tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4); + } break; } #endif @@ -1311,44 +1409,34 @@ static inline void tcg_out_qemu_st(TCGContext *s, int cond, static uint8_t *tb_ret_addr; -static inline void tcg_out_op(TCGContext *s, int opc, +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { int c; switch (opc) { case INDEX_op_exit_tb: -#ifdef SAVE_LR - if (args[0] >> 8) - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, 15, 0); - else - tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 15, 0, 14, SHIFT_IMM_LSL(0)); - if (args[0] >> 8) - tcg_out32(s, args[0]); -#else { uint8_t *ld_ptr = s->code_ptr; if (args[0] >> 8) - tcg_out_ld32_12(s, COND_AL, 0, 15, 0); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); else - tcg_out_dat_imm(s, COND_AL, ARITH_MOV, 0, 0, args[0]); + tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]); tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); if (args[0] >> 8) { *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; tcg_out32(s, args[0]); } } -#endif break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* Direct jump method */ #if defined(USE_DIRECT_JUMP) s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - tcg_out_b(s, COND_AL, 8); + tcg_out_b_noaddr(s, COND_AL); #else - tcg_out_ld32_12(s, COND_AL, 15, 15, -4); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; tcg_out32(s, 0); #endif @@ -1359,12 +1447,12 @@ static inline void tcg_out_op(TCGContext *s, int opc, if (c > 0xfff || c < -0xfff) { tcg_out_movi32(s, COND_AL, TCG_REG_R0, (tcg_target_long) (s->tb_next + args[0])); - tcg_out_ld32_12(s, COND_AL, 15, TCG_REG_R0, 0); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0); } else - tcg_out_ld32_12(s, COND_AL, 15, 15, c); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c); #else - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, 15, 0); - tcg_out_ld32_12(s, COND_AL, 15, TCG_REG_R0, 0); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0); tcg_out32(s, (tcg_target_long) (s->tb_next + args[0])); #endif } @@ -1372,7 +1460,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, break; case INDEX_op_call: if (const_args[0]) - tcg_out_call(s, COND_AL, args[0]); + tcg_out_call(s, args[0]); else tcg_out_callr(s, COND_AL, args[0]); break; @@ -1402,10 +1490,10 @@ static inline void tcg_out_op(TCGContext *s, int opc, tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); break; case INDEX_op_st8_i32: - tcg_out_st8u(s, COND_AL, args[0], args[1], args[2]); + tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); break; case INDEX_op_st16_i32: - tcg_out_st16u(s, COND_AL, args[0], args[1], args[2]); + tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); break; case INDEX_op_st_i32: tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); @@ -1427,6 +1515,9 @@ static inline void tcg_out_op(TCGContext *s, int opc, case INDEX_op_and_i32: c = ARITH_AND; goto gen_arith; + case INDEX_op_andc_i32: + c = ARITH_BIC; + goto gen_arith; case INDEX_op_or_i32: c = ARITH_ORR; goto gen_arith; @@ -1466,16 +1557,6 @@ static inline void tcg_out_op(TCGContext *s, int opc, case INDEX_op_mulu2_i32: tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); break; - case INDEX_op_div2_i32: - tcg_out_div_helper(s, COND_AL, args, - tcg_helper_div_i64, tcg_helper_rem_i64, - SHIFT_IMM_ASR(31)); - break; - case INDEX_op_divu2_i32: - tcg_out_div_helper(s, COND_AL, args, - tcg_helper_divu_i64, tcg_helper_remu_i64, - SHIFT_IMM_LSR(31)); - break; /* XXX: Perhaps args[2] & 0x1f is wrong */ case INDEX_op_shl_i32: c = const_args[2] ? @@ -1488,14 +1569,38 @@ static inline void tcg_out_op(TCGContext *s, int opc, case INDEX_op_sar_i32: c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); + goto gen_shift32; + case INDEX_op_rotr_i32: + c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : + SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); /* Fall through. */ gen_shift32: tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); break; + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + ((0x20 - args[2]) & 0x1f) ? + SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : + SHIFT_IMM_LSL(0)); + } else { + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_R8, args[1], 0x20); + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], + SHIFT_REG_ROR(TCG_REG_R8)); + } + break; + case INDEX_op_brcond_i32: - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, - args[0], args[1], SHIFT_IMM_LSL(0)); + if (const_args[1]) { + int rot; + rot = encode_imm(args[1]); + tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, + args[0], rotl(args[1], rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, + args[0], args[1], SHIFT_IMM_LSL(0)); + } tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]); break; case INDEX_op_brcond2_i32: @@ -1513,60 +1618,80 @@ static inline void tcg_out_op(TCGContext *s, int opc, args[0], args[2], SHIFT_IMM_LSL(0)); tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]); break; + case INDEX_op_setcond_i32: + if (const_args[2]) { + int rot; + rot = encode_imm(args[2]); + tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, + args[1], rotl(args[2], rot) | (rot << 7)); + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, + args[1], args[2], SHIFT_IMM_LSL(0)); + } + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + ARITH_MOV, args[0], 0, 0); + break; + case INDEX_op_setcond2_i32: + /* See brcond2_i32 comment */ + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, + args[2], args[4], SHIFT_IMM_LSL(0)); + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, + args[1], args[3], SHIFT_IMM_LSL(0)); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], + ARITH_MOV, args[0], 0, 1); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], + ARITH_MOV, args[0], 0, 0); + break; case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, COND_AL, args, 0); + tcg_out_qemu_ld(s, args, 0); break; case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, COND_AL, args, 0 | 4); + tcg_out_qemu_ld(s, args, 0 | 4); break; case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, COND_AL, args, 1); + tcg_out_qemu_ld(s, args, 1); break; case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, COND_AL, args, 1 | 4); + tcg_out_qemu_ld(s, args, 1 | 4); break; - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, COND_AL, args, 2); + case INDEX_op_qemu_ld32: + tcg_out_qemu_ld(s, args, 2); break; case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, COND_AL, args, 3); + tcg_out_qemu_ld(s, args, 3); break; case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, COND_AL, args, 0); + tcg_out_qemu_st(s, args, 0); break; case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, COND_AL, args, 1); + tcg_out_qemu_st(s, args, 1); break; case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, COND_AL, args, 2); + tcg_out_qemu_st(s, args, 2); break; case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, COND_AL, args, 3); + tcg_out_qemu_st(s, args, 3); + break; + + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, COND_AL, args[0], args[1]); break; case INDEX_op_ext8s_i32: -#ifdef __ARM_ARCH_7A__ - /* sxtb */ - tcg_out32(s, 0xe6af0070 | (args[0] << 12) | args[1]); -#else - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - args[0], 0, args[1], SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - args[0], 0, args[0], SHIFT_IMM_ASR(24)); -#endif + tcg_out_ext8s(s, COND_AL, args[0], args[1]); break; case INDEX_op_ext16s_i32: -#ifdef __ARM_ARCH_7A__ - /* sxth */ - tcg_out32(s, 0xe6bf0070 | (args[0] << 12) | args[1]); -#else - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - args[0], 0, args[1], SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, - args[0], 0, args[0], SHIFT_IMM_ASR(16)); -#endif + tcg_out_ext16s(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext16u_i32: + tcg_out_ext16u(s, COND_AL, args[0], args[1]); break; default: @@ -1598,9 +1723,8 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_sub_i32, { "r", "r", "rI" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_div2_i32, { "r", "r", "r", "1", "2" } }, - { INDEX_op_divu2_i32, { "r", "r", "r", "1", "2" } }, { INDEX_op_and_i32, { "r", "r", "rI" } }, + { INDEX_op_andc_i32, { "r", "r", "rI" } }, { INDEX_op_or_i32, { "r", "r", "rI" } }, { INDEX_op_xor_i32, { "r", "r", "rI" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1609,50 +1733,75 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_shl_i32, { "r", "r", "ri" } }, { INDEX_op_shr_i32, { "r", "r", "ri" } }, { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, - { INDEX_op_brcond_i32, { "r", "r" } }, + { INDEX_op_brcond_i32, { "r", "rI" } }, + { INDEX_op_setcond_i32, { "r", "r", "rI" } }, /* TODO: "r", "r", "r", "r", "ri", "ri" */ { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } }, { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } }, { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } }, + +#if TARGET_LONG_BITS == 32 + { INDEX_op_qemu_ld8u, { "r", "l" } }, + { INDEX_op_qemu_ld8s, { "r", "l" } }, + { INDEX_op_qemu_ld16u, { "r", "l" } }, + { INDEX_op_qemu_ld16s, { "r", "l" } }, + { INDEX_op_qemu_ld32, { "r", "l" } }, + { INDEX_op_qemu_ld64, { "L", "L", "l" } }, + + { INDEX_op_qemu_st8, { "s", "s" } }, + { INDEX_op_qemu_st16, { "s", "s" } }, + { INDEX_op_qemu_st32, { "s", "s" } }, + { INDEX_op_qemu_st64, { "S", "S", "s" } }, +#else + { INDEX_op_qemu_ld8u, { "r", "l", "l" } }, + { INDEX_op_qemu_ld8s, { "r", "l", "l" } }, + { INDEX_op_qemu_ld16u, { "r", "l", "l" } }, + { INDEX_op_qemu_ld16s, { "r", "l", "l" } }, + { INDEX_op_qemu_ld32, { "r", "l", "l" } }, + { INDEX_op_qemu_ld64, { "L", "L", "l", "l" } }, + + { INDEX_op_qemu_st8, { "s", "s", "s" } }, + { INDEX_op_qemu_st16, { "s", "s", "s" } }, + { INDEX_op_qemu_st32, { "s", "s", "s" } }, + { INDEX_op_qemu_st64, { "S", "S", "s", "s" } }, +#endif - { INDEX_op_qemu_ld8u, { "r", "x", "X" } }, - { INDEX_op_qemu_ld8s, { "r", "x", "X" } }, - { INDEX_op_qemu_ld16u, { "r", "x", "X" } }, - { INDEX_op_qemu_ld16s, { "r", "x", "X" } }, - { INDEX_op_qemu_ld32u, { "r", "x", "X" } }, - { INDEX_op_qemu_ld64, { "d", "r", "x", "X" } }, - - { INDEX_op_qemu_st8, { "x", "x", "X" } }, - { INDEX_op_qemu_st16, { "x", "x", "X" } }, - { INDEX_op_qemu_st32, { "x", "x", "X" } }, - { INDEX_op_qemu_st64, { "x", "D", "x", "X" } }, + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, { INDEX_op_ext8s_i32, { "r", "r" } }, { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, { -1 }, }; -void tcg_target_init(TCGContext *s) +static void tcg_target_init(TCGContext *s) { +#if !defined(CONFIG_USER_ONLY) /* fail safe */ if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) tcg_abort(); +#endif - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, - ((2 << TCG_REG_R14) - 1) & ~(1 << TCG_REG_R8)); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, - ((2 << TCG_REG_R3) - 1) | - (1 << TCG_REG_R12) | (1 << TCG_REG_R14)); + (1 << TCG_REG_R0) | + (1 << TCG_REG_R1) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R12) | + (1 << TCG_REG_R14)); tcg_regset_clear(s->reserved_regs); -#ifdef SAVE_LR - tcg_regset_set_reg(s->reserved_regs, TCG_REG_R14); -#endif tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); tcg_regset_set_reg(s->reserved_regs, TCG_REG_R8); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); tcg_add_target_add_op_defs(arm_op_defs); } @@ -1684,7 +1833,7 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) } } -static inline void tcg_out_mov(TCGContext *s, int ret, int arg) +static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) { tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); } @@ -1695,14 +1844,17 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi32(s, COND_AL, ret, arg); } -void tcg_target_qemu_prologue(TCGContext *s) +static void tcg_target_qemu_prologue(TCGContext *s) { - /* stmdb sp!, { r9 - r11, lr } */ - tcg_out32(s, (COND_AL << 28) | 0x092d4e00); + /* There is no need to save r7, it is used to store the address + of the env structure and is not modified by GCC. */ + + /* stmdb sp!, { r4 - r6, r8 - r11, lr } */ + tcg_out32(s, (COND_AL << 28) | 0x092d4f70); tcg_out_bx(s, COND_AL, TCG_REG_R0); tb_ret_addr = s->code_ptr; - /* ldmia sp!, { r9 - r11, pc } */ - tcg_out32(s, (COND_AL << 28) | 0x08bd8e00); + /* ldmia sp!, { r4 - r6, r8 - r11, pc } */ + tcg_out32(s, (COND_AL << 28) | 0x08bd8f70); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5eac7bf..d8d7d94 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -26,14 +26,6 @@ #define TCG_TARGET_REG_BITS 32 #undef TCG_TARGET_WORDS_BIGENDIAN -#undef TCG_TARGET_HAS_div_i32 -#undef TCG_TARGET_HAS_div_i64 -#undef TCG_TARGET_HAS_bswap32_i32 -#define TCG_TARGET_HAS_ext8s_i32 -#define TCG_TARGET_HAS_ext16s_i32 -#define TCG_TARGET_HAS_neg_i32 -#undef TCG_TARGET_HAS_neg_i64 -#define TCG_TARGET_HAS_not_i32 #undef TCG_TARGET_STACK_GROWSUP enum { @@ -52,38 +44,40 @@ enum { TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, + TCG_REG_PC, }; -#define TCG_TARGET_NB_REGS 15 +#define TCG_TARGET_NB_REGS 16 #define TCG_CT_CONST_ARM 0x100 /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R13 #define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_CALL_STACK_OFFSET 0 /* optional instructions */ -// #define TCG_TARGET_HAS_div_i32 -// #define TCG_TARGET_HAS_rot_i32 -// #define TCG_TARGET_HAS_ext8s_i32 -// #define TCG_TARGET_HAS_ext16s_i32 -// #define TCG_TARGET_HAS_ext8u_i32 -// #define TCG_TARGET_HAS_ext16u_i32 -// #define TCG_TARGET_HAS_bswap16_i32 -// #define TCG_TARGET_HAS_bswap32_i32 -// #define TCG_TARGET_HAS_not_i32 -// #define TCG_TARGET_HAS_neg_i32 -// #define TCG_TARGET_HAS_andc_i32 +#define TCG_TARGET_HAS_ext8s_i32 +#define TCG_TARGET_HAS_ext16s_i32 +#undef TCG_TARGET_HAS_ext8u_i32 /* and r0, r1, #0xff */ +#define TCG_TARGET_HAS_ext16u_i32 +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 +#define TCG_TARGET_HAS_not_i32 +#define TCG_TARGET_HAS_neg_i32 +#define TCG_TARGET_HAS_rot_i32 +#define TCG_TARGET_HAS_andc_i32 // #define TCG_TARGET_HAS_orc_i32 +// #define TCG_TARGET_HAS_eqv_i32 +// #define TCG_TARGET_HAS_nand_i32 +// #define TCG_TARGET_HAS_nor_i32 #define TCG_TARGET_HAS_GUEST_BASE enum { /* Note: must be synced with dyngen-exec.h */ TCG_AREG0 = TCG_REG_R7, - TCG_AREG1 = TCG_REG_R4, - TCG_AREG2 = TCG_REG_R5, }; static inline void flush_icache_range(unsigned long start, unsigned long stop) diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c index 4677971..7f4653e 100644 --- a/tcg/hppa/tcg-target.c +++ b/tcg/hppa/tcg-target.c @@ -24,41 +24,22 @@ #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%r0", - "%r1", - "%rp", - "%r3", - "%r4", - "%r5", - "%r6", - "%r7", - "%r8", - "%r9", - "%r10", - "%r11", - "%r12", - "%r13", - "%r14", - "%r15", - "%r16", - "%r17", - "%r18", - "%r19", - "%r20", - "%r21", - "%r22", - "%r23", - "%r24", - "%r25", - "%r26", - "%dp", - "%ret0", - "%ret1", - "%sp", - "%r31", + "%r0", "%r1", "%rp", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", + "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", + "%r24", "%r25", "%r26", "%dp", "%ret0", "%ret1", "%sp", "%r31", }; #endif +/* This is an 8 byte temp slot in the stack frame. */ +#define STACK_TEMP_OFS -16 + +#ifdef CONFIG_USE_GUEST_BASE +#define TCG_GUEST_BASE_REG TCG_REG_R16 +#else +#define TCG_GUEST_BASE_REG TCG_REG_R0 +#endif + static const int tcg_target_reg_alloc_order[] = { TCG_REG_R4, TCG_REG_R5, @@ -75,6 +56,14 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R14, TCG_REG_R15, TCG_REG_R16, + + TCG_REG_R26, + TCG_REG_R25, + TCG_REG_R24, + TCG_REG_R23, + + TCG_REG_RET0, + TCG_REG_RET1, }; static const int tcg_target_call_iarg_regs[4] = { @@ -89,16 +78,101 @@ static const int tcg_target_call_oarg_regs[2] = { TCG_REG_RET1, }; +/* True iff val fits a signed field of width BITS. */ +static inline int check_fit_tl(tcg_target_long val, unsigned int bits) +{ + return (val << ((sizeof(tcg_target_long) * 8 - bits)) + >> (sizeof(tcg_target_long) * 8 - bits)) == val; +} + +/* True iff depi can be used to compute (reg | MASK). + Accept a bit pattern like: + 0....01....1 + 1....10....0 + 0..01..10..0 + Copied from gcc sources. */ +static inline int or_mask_p(tcg_target_ulong mask) +{ + if (mask == 0 || mask == -1) { + return 0; + } + mask += mask & -mask; + return (mask & (mask - 1)) == 0; +} + +/* True iff depi or extru can be used to compute (reg & mask). + Accept a bit pattern like these: + 0....01....1 + 1....10....0 + 1..10..01..1 + Copied from gcc sources. */ +static inline int and_mask_p(tcg_target_ulong mask) +{ + return or_mask_p(~mask); +} + +static int low_sign_ext(int val, int len) +{ + return (((val << 1) & ~(-1u << len)) | ((val >> (len - 1)) & 1)); +} + +static int reassemble_12(int as12) +{ + return (((as12 & 0x800) >> 11) | + ((as12 & 0x400) >> 8) | + ((as12 & 0x3ff) << 3)); +} + +static int reassemble_17(int as17) +{ + return (((as17 & 0x10000) >> 16) | + ((as17 & 0x0f800) << 5) | + ((as17 & 0x00400) >> 8) | + ((as17 & 0x003ff) << 3)); +} + +static int reassemble_21(int as21) +{ + return (((as21 & 0x100000) >> 20) | + ((as21 & 0x0ffe00) >> 8) | + ((as21 & 0x000180) << 7) | + ((as21 & 0x00007c) << 14) | + ((as21 & 0x000003) << 12)); +} + +/* ??? Bizzarely, there is no PCREL12F relocation type. I guess all + such relocations are simply fully handled by the assembler. */ +#define R_PARISC_PCREL12F R_PARISC_NONE + static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { + uint32_t *insn_ptr = (uint32_t *)code_ptr; + uint32_t insn = *insn_ptr; + tcg_target_long pcrel; + + value += addend; + pcrel = (value - ((tcg_target_long)code_ptr + 8)) >> 2; + switch (type) { + case R_PARISC_PCREL12F: + assert(check_fit_tl(pcrel, 12)); + /* ??? We assume all patches are forward. See tcg_out_brcond + re setting the NUL bit on the branch and eliding the nop. */ + assert(pcrel >= 0); + insn &= ~0x1ffdu; + insn |= reassemble_12(pcrel); + break; case R_PARISC_PCREL17F: - hppa_patch17f((uint32_t *)code_ptr, value, addend); + assert(check_fit_tl(pcrel, 17)); + insn &= ~0x1f1ffdu; + insn |= reassemble_17(pcrel); break; default: tcg_abort(); } + + *insn_ptr = insn; } /* maximum number of register used for input function arguments */ @@ -126,6 +200,24 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) tcg_regset_reset_reg(ct->u.regs, TCG_REG_R24); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R23); break; + case 'Z': + ct->ct |= TCG_CT_CONST_0; + break; + case 'I': + ct->ct |= TCG_CT_CONST_S11; + break; + case 'J': + ct->ct |= TCG_CT_CONST_S5; + break; + case 'K': + ct->ct |= TCG_CT_CONST_MS11; + break; + case 'M': + ct->ct |= TCG_CT_CONST_AND; + break; + case 'O': + ct->ct |= TCG_CT_CONST_OR; + break; default: return -1; } @@ -135,15 +227,25 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) } /* test if a constant matches the constraint */ -static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) { - int ct; - - ct = arg_ct->ct; - - /* TODO */ - + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } else if (ct & TCG_CT_CONST_0) { + return val == 0; + } else if (ct & TCG_CT_CONST_S5) { + return check_fit_tl(val, 5); + } else if (ct & TCG_CT_CONST_S11) { + return check_fit_tl(val, 11); + } else if (ct & TCG_CT_CONST_MS11) { + return check_fit_tl(-val, 11); + } else if (ct & TCG_CT_CONST_AND) { + return and_mask_p(val); + } else if (ct & TCG_CT_CONST_OR) { + return or_mask_p(val); + } return 0; } @@ -163,692 +265,1197 @@ static inline int tcg_target_const_match(tcg_target_long val, #define INSN_SHDEP_CP(x) ((31 - (x)) << 5) #define INSN_SHDEP_P(x) ((x) << 5) #define INSN_COND(x) ((x) << 13) +#define INSN_IM11(x) low_sign_ext(x, 11) +#define INSN_IM14(x) low_sign_ext(x, 14) +#define INSN_IM5(x) (low_sign_ext(x, 5) << 16) + +#define COND_NEVER 0 +#define COND_EQ 1 +#define COND_LT 2 +#define COND_LE 3 +#define COND_LTU 4 +#define COND_LEU 5 +#define COND_SV 6 +#define COND_OD 7 +#define COND_FALSE 8 + +#define INSN_ADD (INSN_OP(0x02) | INSN_EXT6(0x18)) +#define INSN_ADDC (INSN_OP(0x02) | INSN_EXT6(0x1c)) +#define INSN_ADDI (INSN_OP(0x2d)) +#define INSN_ADDIL (INSN_OP(0x0a)) +#define INSN_ADDL (INSN_OP(0x02) | INSN_EXT6(0x28)) +#define INSN_AND (INSN_OP(0x02) | INSN_EXT6(0x08)) +#define INSN_ANDCM (INSN_OP(0x02) | INSN_EXT6(0x00)) +#define INSN_COMCLR (INSN_OP(0x02) | INSN_EXT6(0x22)) +#define INSN_COMICLR (INSN_OP(0x24)) +#define INSN_DEP (INSN_OP(0x35) | INSN_EXT3SH(3)) +#define INSN_DEPI (INSN_OP(0x35) | INSN_EXT3SH(7)) +#define INSN_EXTRS (INSN_OP(0x34) | INSN_EXT3SH(7)) +#define INSN_EXTRU (INSN_OP(0x34) | INSN_EXT3SH(6)) +#define INSN_LDIL (INSN_OP(0x08)) +#define INSN_LDO (INSN_OP(0x0d)) +#define INSN_MTCTL (INSN_OP(0x00) | INSN_EXT8B(0xc2)) +#define INSN_OR (INSN_OP(0x02) | INSN_EXT6(0x09)) +#define INSN_SHD (INSN_OP(0x34) | INSN_EXT3SH(2)) +#define INSN_SUB (INSN_OP(0x02) | INSN_EXT6(0x10)) +#define INSN_SUBB (INSN_OP(0x02) | INSN_EXT6(0x14)) +#define INSN_SUBI (INSN_OP(0x25)) +#define INSN_VEXTRS (INSN_OP(0x34) | INSN_EXT3SH(5)) +#define INSN_VEXTRU (INSN_OP(0x34) | INSN_EXT3SH(4)) +#define INSN_VSHD (INSN_OP(0x34) | INSN_EXT3SH(0)) +#define INSN_XOR (INSN_OP(0x02) | INSN_EXT6(0x0a)) +#define INSN_ZDEP (INSN_OP(0x35) | INSN_EXT3SH(2)) +#define INSN_ZVDEP (INSN_OP(0x35) | INSN_EXT3SH(0)) + +#define INSN_BL (INSN_OP(0x3a) | INSN_EXT3BR(0)) +#define INSN_BL_N (INSN_OP(0x3a) | INSN_EXT3BR(0) | 2) +#define INSN_BLR (INSN_OP(0x3a) | INSN_EXT3BR(2)) +#define INSN_BV (INSN_OP(0x3a) | INSN_EXT3BR(6)) +#define INSN_BV_N (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2) +#define INSN_BLE_SR4 (INSN_OP(0x39) | (1 << 13)) + +#define INSN_LDB (INSN_OP(0x10)) +#define INSN_LDH (INSN_OP(0x11)) +#define INSN_LDW (INSN_OP(0x12)) +#define INSN_LDWM (INSN_OP(0x13)) +#define INSN_FLDDS (INSN_OP(0x0b) | INSN_EXT4(0) | (1 << 12)) + +#define INSN_LDBX (INSN_OP(0x03) | INSN_EXT4(0)) +#define INSN_LDHX (INSN_OP(0x03) | INSN_EXT4(1)) +#define INSN_LDWX (INSN_OP(0x03) | INSN_EXT4(2)) + +#define INSN_STB (INSN_OP(0x18)) +#define INSN_STH (INSN_OP(0x19)) +#define INSN_STW (INSN_OP(0x1a)) +#define INSN_STWM (INSN_OP(0x1b)) +#define INSN_FSTDS (INSN_OP(0x0b) | INSN_EXT4(8) | (1 << 12)) + +#define INSN_COMBT (INSN_OP(0x20)) +#define INSN_COMBF (INSN_OP(0x22)) +#define INSN_COMIBT (INSN_OP(0x21)) +#define INSN_COMIBF (INSN_OP(0x23)) + +/* supplied by libgcc */ +extern void *__canonicalize_funcptr_for_compare(void *); + +static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) +{ + /* PA1.1 defines COPY as OR r,0,t; PA2.0 defines COPY as LDO 0(r),t + but hppa-dis.c is unaware of this definition */ + if (ret != arg) { + tcg_out32(s, INSN_OR | INSN_T(ret) | INSN_R1(arg) + | INSN_R2(TCG_REG_R0)); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg) +{ + if (check_fit_tl(arg, 14)) { + tcg_out32(s, INSN_LDO | INSN_R1(ret) + | INSN_R2(TCG_REG_R0) | INSN_IM14(arg)); + } else { + uint32_t hi, lo; + hi = arg >> 11; + lo = arg & 0x7ff; + + tcg_out32(s, INSN_LDIL | INSN_R2(ret) | reassemble_21(hi)); + if (lo) { + tcg_out32(s, INSN_LDO | INSN_R1(ret) + | INSN_R2(ret) | INSN_IM14(lo)); + } + } +} + +static void tcg_out_ldst(TCGContext *s, int ret, int addr, + tcg_target_long offset, int op) +{ + if (!check_fit_tl(offset, 14)) { + uint32_t hi, lo, op; + + hi = offset >> 11; + lo = offset & 0x7ff; + + if (addr == TCG_REG_R0) { + op = INSN_LDIL | INSN_R2(TCG_REG_R1); + } else { + op = INSN_ADDIL | INSN_R2(addr); + } + tcg_out32(s, op | reassemble_21(hi)); -#define COND_NEVER 0 -#define COND_EQUAL 1 -#define COND_LT 2 -#define COND_LTEQ 3 -#define COND_LTU 4 -#define COND_LTUEQ 5 -#define COND_SV 6 -#define COND_OD 7 + addr = TCG_REG_R1; + offset = lo; + } + + if (ret != addr || offset != 0 || op != INSN_LDO) { + tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | INSN_IM14(offset)); + } +} +/* This function is required by tcg.c. */ +static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, + int arg1, tcg_target_long arg2) +{ + tcg_out_ldst(s, ret, arg1, arg2, INSN_LDW); +} -/* Logical ADD */ -#define ARITH_ADD (INSN_OP(0x02) | INSN_EXT6(0x28)) -#define ARITH_AND (INSN_OP(0x02) | INSN_EXT6(0x08)) -#define ARITH_OR (INSN_OP(0x02) | INSN_EXT6(0x09)) -#define ARITH_XOR (INSN_OP(0x02) | INSN_EXT6(0x0a)) -#define ARITH_SUB (INSN_OP(0x02) | INSN_EXT6(0x10)) +/* This function is required by tcg.c. */ +static inline void tcg_out_st(TCGContext *s, TCGType type, int ret, + int arg1, tcg_target_long arg2) +{ + tcg_out_ldst(s, ret, arg1, arg2, INSN_STW); +} -#define SHD (INSN_OP(0x34) | INSN_EXT3SH(2)) -#define VSHD (INSN_OP(0x34) | INSN_EXT3SH(0)) -#define DEP (INSN_OP(0x35) | INSN_EXT3SH(3)) -#define ZDEP (INSN_OP(0x35) | INSN_EXT3SH(2)) -#define ZVDEP (INSN_OP(0x35) | INSN_EXT3SH(0)) -#define EXTRU (INSN_OP(0x34) | INSN_EXT3SH(6)) -#define EXTRS (INSN_OP(0x34) | INSN_EXT3SH(7)) -#define VEXTRS (INSN_OP(0x34) | INSN_EXT3SH(5)) +static void tcg_out_ldst_index(TCGContext *s, int data, + int base, int index, int op) +{ + tcg_out32(s, op | INSN_T(data) | INSN_R1(index) | INSN_R2(base)); +} -#define SUBI (INSN_OP(0x25)) -#define MTCTL (INSN_OP(0x00) | INSN_EXT8B(0xc2)) +static inline void tcg_out_addi2(TCGContext *s, int ret, int arg1, + tcg_target_long val) +{ + tcg_out_ldst(s, ret, arg1, val, INSN_LDO); +} -#define BL (INSN_OP(0x3a) | INSN_EXT3BR(0)) -#define BLE_SR4 (INSN_OP(0x39) | (1 << 13)) -#define BV (INSN_OP(0x3a) | INSN_EXT3BR(6)) -#define BV_N (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2) -#define LDIL (INSN_OP(0x08)) -#define LDO (INSN_OP(0x0d)) +/* This function is required by tcg.c. */ +static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ + tcg_out_addi2(s, reg, reg, val); +} -#define LDB (INSN_OP(0x10)) -#define LDH (INSN_OP(0x11)) -#define LDW (INSN_OP(0x12)) -#define LDWM (INSN_OP(0x13)) +static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int op) +{ + tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2)); +} -#define STB (INSN_OP(0x18)) -#define STH (INSN_OP(0x19)) -#define STW (INSN_OP(0x1a)) -#define STWM (INSN_OP(0x1b)) +static inline void tcg_out_arithi(TCGContext *s, int t, int r1, + tcg_target_long val, int op) +{ + assert(check_fit_tl(val, 11)); + tcg_out32(s, op | INSN_R1(t) | INSN_R2(r1) | INSN_IM11(val)); +} -#define COMBT (INSN_OP(0x20)) -#define COMBF (INSN_OP(0x22)) +static inline void tcg_out_nop(TCGContext *s) +{ + tcg_out_arith(s, TCG_REG_R0, TCG_REG_R0, TCG_REG_R0, INSN_OR); +} -static int lowsignext(uint32_t val, int start, int length) +static inline void tcg_out_mtctl_sar(TCGContext *s, int arg) { - return (((val << 1) & ~(~0 << length)) | - ((val >> (length - 1)) & 1)) << start; + tcg_out32(s, INSN_MTCTL | INSN_R2(11) | INSN_R1(arg)); } -static inline void tcg_out_mov(TCGContext *s, int ret, int arg) +/* Extract LEN bits at position OFS from ARG and place in RET. + Note that here the bit ordering is reversed from the PA-RISC + standard, such that the right-most bit is 0. */ +static inline void tcg_out_extr(TCGContext *s, int ret, int arg, + unsigned ofs, unsigned len, int sign) { - /* PA1.1 defines COPY as OR r,0,t */ - tcg_out32(s, ARITH_OR | INSN_T(ret) | INSN_R1(arg) | INSN_R2(TCG_REG_R0)); + assert(ofs < 32 && len <= 32 - ofs); + tcg_out32(s, (sign ? INSN_EXTRS : INSN_EXTRU) + | INSN_R1(ret) | INSN_R2(arg) + | INSN_SHDEP_P(31 - ofs) | INSN_DEP_LEN(len)); +} - /* PA2.0 defines COPY as LDO 0(r),t - * but hppa-dis.c is unaware of this definition */ - /* tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(arg) | reassemble_14(0)); */ +/* Likewise with OFS interpreted little-endian. */ +static inline void tcg_out_dep(TCGContext *s, int ret, int arg, + unsigned ofs, unsigned len) +{ + assert(ofs < 32 && len <= 32 - ofs); + tcg_out32(s, INSN_DEP | INSN_R2(ret) | INSN_R1(arg) + | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len)); } -static inline void tcg_out_movi(TCGContext *s, TCGType type, - int ret, tcg_target_long arg) +static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo, + unsigned count) { - if (arg == (arg & 0x1fff)) { - tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(TCG_REG_R0) | - reassemble_14(arg)); + assert(count < 32); + tcg_out32(s, INSN_SHD | INSN_R1(hi) | INSN_R2(lo) | INSN_T(ret) + | INSN_SHDEP_CP(count)); +} + +static void tcg_out_vshd(TCGContext *s, int ret, int hi, int lo, int creg) +{ + tcg_out_mtctl_sar(s, creg); + tcg_out32(s, INSN_VSHD | INSN_T(ret) | INSN_R1(hi) | INSN_R2(lo)); +} + +static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m) +{ + int bs0, bs1; + + /* Note that the argument is constrained to match or_mask_p. */ + for (bs0 = 0; bs0 < 32; bs0++) { + if ((m & (1u << bs0)) != 0) { + break; + } + } + for (bs1 = bs0; bs1 < 32; bs1++) { + if ((m & (1u << bs1)) == 0) { + break; + } + } + assert(bs1 == 32 || (1ul << bs1) > m); + + tcg_out_mov(s, TCG_TYPE_I32, ret, arg); + tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(-1) + | INSN_SHDEP_CP(31 - bs0) | INSN_DEP_LEN(bs1 - bs0)); +} + +static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m) +{ + int ls0, ls1, ms0; + + /* Note that the argument is constrained to match and_mask_p. */ + for (ls0 = 0; ls0 < 32; ls0++) { + if ((m & (1u << ls0)) == 0) { + break; + } + } + for (ls1 = ls0; ls1 < 32; ls1++) { + if ((m & (1u << ls1)) != 0) { + break; + } + } + for (ms0 = ls1; ms0 < 32; ms0++) { + if ((m & (1u << ms0)) == 0) { + break; + } + } + assert (ms0 == 32); + + if (ls1 == 32) { + tcg_out_extr(s, ret, arg, 0, ls0, 0); } else { - tcg_out32(s, LDIL | INSN_R2(ret) | - reassemble_21(lrsel((uint32_t)arg, 0))); - if (arg & 0x7ff) - tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(ret) | - reassemble_14(rrsel((uint32_t)arg, 0))); + tcg_out_mov(s, TCG_TYPE_I32, ret, arg); + tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(0) + | INSN_SHDEP_CP(31 - ls0) | INSN_DEP_LEN(ls1 - ls0)); } } -static inline void tcg_out_ld_raw(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) { - tcg_out32(s, LDIL | INSN_R2(ret) | - reassemble_21(lrsel((uint32_t)arg, 0))); - tcg_out32(s, LDW | INSN_R1(ret) | INSN_R2(ret) | - reassemble_14(rrsel((uint32_t)arg, 0))); + tcg_out_extr(s, ret, arg, 0, 8, 1); } -static inline void tcg_out_ld_ptr(TCGContext *s, int ret, - tcg_target_long arg) +static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) { - tcg_out_ld_raw(s, ret, arg); + tcg_out_extr(s, ret, arg, 0, 16, 1); } -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, - int op) +static void tcg_out_shli(TCGContext *s, int ret, int arg, int count) { - if (offset == (offset & 0xfff)) - tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | - reassemble_14(offset)); - else { - fprintf(stderr, "unimplemented %s with offset %d\n", __func__, offset); - tcg_abort(); - } + count &= 31; + tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg) + | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count)); } -static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, - int arg1, tcg_target_long arg2) +static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg) { - fprintf(stderr, "unimplemented %s\n", __func__); - tcg_abort(); + tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI); + tcg_out_mtctl_sar(s, TCG_REG_R20); + tcg_out32(s, INSN_ZVDEP | INSN_R2(ret) | INSN_R1(arg) | INSN_DEP_LEN(32)); } -static inline void tcg_out_st(TCGContext *s, TCGType type, int ret, - int arg1, tcg_target_long arg2) +static void tcg_out_shri(TCGContext *s, int ret, int arg, int count) { - fprintf(stderr, "unimplemented %s\n", __func__); - tcg_abort(); + count &= 31; + tcg_out_extr(s, ret, arg, count, 32 - count, 0); } -static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int op) +static void tcg_out_shr(TCGContext *s, int ret, int arg, int creg) { - tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2)); + tcg_out_vshd(s, ret, TCG_REG_R0, arg, creg); } -static inline void tcg_out_arithi(TCGContext *s, int t, int r1, - tcg_target_long val, int op) +static void tcg_out_sari(TCGContext *s, int ret, int arg, int count) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, val); - tcg_out_arith(s, t, r1, TCG_REG_R20, op); + count &= 31; + tcg_out_extr(s, ret, arg, count, 32 - count, 1); } -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +static void tcg_out_sar(TCGContext *s, int ret, int arg, int creg) { - tcg_out_arithi(s, reg, reg, val, ARITH_ADD); + tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI); + tcg_out_mtctl_sar(s, TCG_REG_R20); + tcg_out32(s, INSN_VEXTRS | INSN_R1(ret) | INSN_R2(arg) | INSN_DEP_LEN(32)); } -static inline void tcg_out_nop(TCGContext *s) +static void tcg_out_rotli(TCGContext *s, int ret, int arg, int count) { - tcg_out32(s, ARITH_OR | INSN_T(TCG_REG_R0) | INSN_R1(TCG_REG_R0) | - INSN_R2(TCG_REG_R0)); + count &= 31; + tcg_out_shd(s, ret, arg, arg, 32 - count); } -static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) { - tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(8)); +static void tcg_out_rotl(TCGContext *s, int ret, int arg, int creg) +{ + tcg_out_arithi(s, TCG_REG_R20, creg, 32, INSN_SUBI); + tcg_out_vshd(s, ret, arg, arg, TCG_REG_R20); } -static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) { - tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(16)); +static void tcg_out_rotri(TCGContext *s, int ret, int arg, int count) +{ + count &= 31; + tcg_out_shd(s, ret, arg, arg, count); } -static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg) { - if(ret != arg) - tcg_out_mov(s, ret, arg); - tcg_out32(s, DEP | INSN_R2(ret) | INSN_R1(ret) | - INSN_SHDEP_CP(15) | INSN_DEP_LEN(8)); - tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(TCG_REG_R0) | - INSN_R2(ret) | INSN_SHDEP_CP(8)); +static void tcg_out_rotr(TCGContext *s, int ret, int arg, int creg) +{ + tcg_out_vshd(s, ret, arg, arg, creg); } -static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp) { - tcg_out32(s, SHD | INSN_T(temp) | INSN_R1(arg) | - INSN_R2(arg) | INSN_SHDEP_CP(16)); - tcg_out32(s, DEP | INSN_R2(temp) | INSN_R1(temp) | - INSN_SHDEP_CP(15) | INSN_DEP_LEN(8)); - tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(arg) | - INSN_R2(temp) | INSN_SHDEP_CP(8)); +static void tcg_out_bswap16(TCGContext *s, int ret, int arg, int sign) +{ + if (ret != arg) { + tcg_out_mov(s, TCG_TYPE_I32, ret, arg); /* arg = xxAB */ + } + tcg_out_dep(s, ret, ret, 16, 8); /* ret = xBAB */ + tcg_out_extr(s, ret, ret, 8, 16, sign); /* ret = ..BA */ } -static inline void tcg_out_call(TCGContext *s, void *func) +static void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp) { - uint32_t val = (uint32_t)__canonicalize_funcptr_for_compare(func); - tcg_out32(s, LDIL | INSN_R2(TCG_REG_R20) | - reassemble_21(lrsel(val, 0))); - tcg_out32(s, BLE_SR4 | INSN_R2(TCG_REG_R20) | - reassemble_17(rrsel(val, 0) >> 2)); - tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31); + /* arg = ABCD */ + tcg_out_rotri(s, temp, arg, 16); /* temp = CDAB */ + tcg_out_dep(s, temp, temp, 16, 8); /* temp = CBAB */ + tcg_out_shd(s, ret, arg, temp, 8); /* ret = DCBA */ } -#if defined(CONFIG_SOFTMMU) +static void tcg_out_call(TCGContext *s, void *func) +{ + tcg_target_long val, hi, lo, disp; -#include "../../softmmu_defs.h" + val = (uint32_t)__canonicalize_funcptr_for_compare(func); + disp = (val - ((tcg_target_long)s->code_ptr + 8)) >> 2; -static void *qemu_ld_helpers[4] = { - __ldb_mmu, - __ldw_mmu, - __ldl_mmu, - __ldq_mmu, -}; + if (check_fit_tl(disp, 17)) { + tcg_out32(s, INSN_BL_N | INSN_R2(TCG_REG_RP) | reassemble_17(disp)); + } else { + hi = val >> 11; + lo = val & 0x7ff; -static void *qemu_st_helpers[4] = { - __stb_mmu, - __stw_mmu, - __stl_mmu, - __stq_mmu, -}; -#endif + tcg_out32(s, INSN_LDIL | INSN_R2(TCG_REG_R20) | reassemble_21(hi)); + tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R20) + | reassemble_17(lo >> 2)); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_RP, TCG_REG_R31); + } +} -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_xmpyu(TCGContext *s, int retl, int reth, + int arg1, int arg2) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap; -#if defined(CONFIG_SOFTMMU) - uint32_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint32_t *label3_ptr; -#endif - int addr_reg2; -#endif + /* Store both words into the stack for copy to the FPU. */ + tcg_out_ldst(s, arg1, TCG_REG_SP, STACK_TEMP_OFS, INSN_STW); + tcg_out_ldst(s, arg2, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_STW); + + /* Load both words into the FPU at the same time. We get away + with this because we can address the left and right half of the + FPU registers individually once loaded. */ + /* fldds stack_temp(sp),fr22 */ + tcg_out32(s, INSN_FLDDS | INSN_R2(TCG_REG_SP) + | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22)); + + /* xmpyu fr22r,fr22,fr22 */ + tcg_out32(s, 0x3ad64796); + + /* Store the 64-bit result back into the stack. */ + /* fstds stack_temp(sp),fr22 */ + tcg_out32(s, INSN_FSTDS | INSN_R2(TCG_REG_SP) + | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22)); + + /* Load the pieces of the result that the caller requested. */ + if (reth) { + tcg_out_ldst(s, reth, TCG_REG_SP, STACK_TEMP_OFS, INSN_LDW); + } + if (retl) { + tcg_out_ldst(s, retl, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_LDW); + } +} - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ - addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif - mem_index = *args; - s_bits = opc & 3; +static void tcg_out_add2(TCGContext *s, int destl, int desth, + int al, int ah, int bl, int bh, int blconst) +{ + int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl); - r0 = TCG_REG_R26; - r1 = TCG_REG_R25; + if (blconst) { + tcg_out_arithi(s, tmp, al, bl, INSN_ADDI); + } else { + tcg_out_arith(s, tmp, al, bl, INSN_ADD); + } + tcg_out_arith(s, desth, ah, bh, INSN_ADDC); -#if defined(CONFIG_SOFTMMU) - tcg_out_mov(s, r1, addr_reg); + tcg_out_mov(s, TCG_TYPE_I32, destl, tmp); +} - tcg_out_mov(s, r0, addr_reg); +static void tcg_out_sub2(TCGContext *s, int destl, int desth, int al, int ah, + int bl, int bh, int alconst, int blconst) +{ + int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl); - tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) | - INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); + if (alconst) { + if (blconst) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, bl); + bl = TCG_REG_R20; + } + tcg_out_arithi(s, tmp, bl, al, INSN_SUBI); + } else if (blconst) { + tcg_out_arithi(s, tmp, al, -bl, INSN_ADDI); + } else { + tcg_out_arith(s, tmp, al, bl, INSN_SUB); + } + tcg_out_arith(s, desth, ah, bh, INSN_SUBB); - tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), - ARITH_AND); + tcg_out_mov(s, TCG_TYPE_I32, destl, tmp); +} - tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, - ARITH_AND); +static void tcg_out_branch(TCGContext *s, int label_index, int nul) +{ + TCGLabel *l = &s->labels[label_index]; + uint32_t op = nul ? INSN_BL_N : INSN_BL; - tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD); - tcg_out_arithi(s, r1, r1, - offsetof(CPUState, tlb_table[mem_index][0].addr_read), - ARITH_ADD); + if (l->has_value) { + tcg_target_long val = l->u.value; - tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW); + val -= (tcg_target_long)s->code_ptr + 8; + val >>= 2; + assert(check_fit_tl(val, 17)); -#if TARGET_LONG_BITS == 32 - /* if equal, jump to label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) | - INSN_COND(COND_EQUAL)); - tcg_out_mov(s, r0, addr_reg); /* delay slot */ -#else - /* if not equal, jump to label3 */ - label3_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) | - INSN_COND(COND_EQUAL)); - tcg_out_mov(s, r0, addr_reg); /* delay slot */ - - tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW); - - /* if equal, jump to label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) | - INSN_COND(COND_EQUAL)); - tcg_out_nop(s); /* delay slot */ - - /* label3: */ - *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2); -#endif + tcg_out32(s, op | reassemble_17(val)); + } else { + /* We need to keep the offset unchanged for retranslation. */ + uint32_t old_insn = *(uint32_t *)s->code_ptr; -#if TARGET_LONG_BITS == 32 - tcg_out_mov(s, TCG_REG_R26, addr_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R25, mem_index); -#else - tcg_out_mov(s, TCG_REG_R26, addr_reg); - tcg_out_mov(s, TCG_REG_R25, addr_reg2); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index); -#endif + tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL17F, label_index, 0); + tcg_out32(s, op | (old_insn & 0x1f1ffdu)); + } +} - tcg_out_call(s, qemu_ld_helpers[s_bits]); +static const uint8_t tcg_cond_to_cmp_cond[10] = +{ + [TCG_COND_EQ] = COND_EQ, + [TCG_COND_NE] = COND_EQ | COND_FALSE, + [TCG_COND_LT] = COND_LT, + [TCG_COND_GE] = COND_LT | COND_FALSE, + [TCG_COND_LE] = COND_LE, + [TCG_COND_GT] = COND_LE | COND_FALSE, + [TCG_COND_LTU] = COND_LTU, + [TCG_COND_GEU] = COND_LTU | COND_FALSE, + [TCG_COND_LEU] = COND_LEU, + [TCG_COND_GTU] = COND_LEU | COND_FALSE, +}; - switch(opc) { - case 0 | 4: - tcg_out_ext8s(s, data_reg, TCG_REG_RET0); - break; - case 1 | 4: - tcg_out_ext16s(s, data_reg, TCG_REG_RET0); - break; - case 0: - case 1: - case 2: - default: - tcg_out_mov(s, data_reg, TCG_REG_RET0); - break; - case 3: - tcg_abort(); - tcg_out_mov(s, data_reg, TCG_REG_RET0); - tcg_out_mov(s, data_reg2, TCG_REG_RET1); - break; +static void tcg_out_brcond(TCGContext *s, int cond, TCGArg c1, + TCGArg c2, int c2const, int label_index) +{ + TCGLabel *l = &s->labels[label_index]; + int op, pacond; + + /* Note that COMIB operates as if the immediate is the first + operand. We model brcond with the immediate in the second + to better match what targets are likely to give us. For + consistency, model COMB with reversed operands as well. */ + pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)]; + + if (c2const) { + op = (pacond & COND_FALSE ? INSN_COMIBF : INSN_COMIBT); + op |= INSN_IM5(c2); + } else { + op = (pacond & COND_FALSE ? INSN_COMBF : INSN_COMBT); + op |= INSN_R1(c2); } + op |= INSN_R2(c1); + op |= INSN_COND(pacond & 7); - /* jump to label2 */ - label2_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2); + if (l->has_value) { + tcg_target_long val = l->u.value; - /* label1: */ - *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2); + val -= (tcg_target_long)s->code_ptr + 8; + val >>= 2; + assert(check_fit_tl(val, 12)); - tcg_out_arithi(s, TCG_REG_R20, r1, - offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read), - ARITH_ADD); - tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW); - tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD); -#else - r0 = addr_reg; -#endif + /* ??? Assume that all branches to defined labels are backward. + Which means that if the nul bit is set, the delay slot is + executed if the branch is taken, and not executed in fallthru. */ + tcg_out32(s, op | reassemble_12(val)); + tcg_out_nop(s); + } else { + /* We need to keep the offset unchanged for retranslation. */ + uint32_t old_insn = *(uint32_t *)s->code_ptr; + + tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL12F, label_index, 0); + /* ??? Assume that all branches to undefined labels are forward. + Which means that if the nul bit is set, the delay slot is + not executed if the branch is taken, which is what we want. */ + tcg_out32(s, op | 2 | (old_insn & 0x1ffdu)); + } +} -#ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; -#else - bswap = 1; -#endif - switch (opc) { - case 0: - tcg_out_ldst(s, data_reg, r0, 0, LDB); - break; - case 0 | 4: - tcg_out_ldst(s, data_reg, r0, 0, LDB); - tcg_out_ext8s(s, data_reg, data_reg); - break; - case 1: - tcg_out_ldst(s, data_reg, r0, 0, LDH); - if (bswap) - tcg_out_bswap16(s, data_reg, data_reg); - break; - case 1 | 4: - tcg_out_ldst(s, data_reg, r0, 0, LDH); - if (bswap) - tcg_out_bswap16(s, data_reg, data_reg); - tcg_out_ext16s(s, data_reg, data_reg); - break; - case 2: - tcg_out_ldst(s, data_reg, r0, 0, LDW); - if (bswap) - tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20); - break; - case 3: - tcg_abort(); - if (!bswap) { - tcg_out_ldst(s, data_reg, r0, 0, LDW); - tcg_out_ldst(s, data_reg2, r0, 4, LDW); - } else { - tcg_out_ldst(s, data_reg, r0, 4, LDW); - tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20); - tcg_out_ldst(s, data_reg2, r0, 0, LDW); - tcg_out_bswap32(s, data_reg2, data_reg2, TCG_REG_R20); - } - break; - default: - tcg_abort(); +static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const) +{ + int op, pacond; + + /* Note that COMICLR operates as if the immediate is the first + operand. We model setcond with the immediate in the second + to better match what targets are likely to give us. For + consistency, model COMCLR with reversed operands as well. */ + pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)]; + + if (c2const) { + op = INSN_COMICLR | INSN_R2(c1) | INSN_R1(ret) | INSN_IM11(c2); + } else { + op = INSN_COMCLR | INSN_R2(c1) | INSN_R1(c2) | INSN_T(ret); } + op |= INSN_COND(pacond & 7); + op |= pacond & COND_FALSE ? 1 << 12 : 0; -#if defined(CONFIG_SOFTMMU) - /* label2: */ - *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2); -#endif + tcg_out32(s, op); } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah, + TCGArg bl, int blconst, TCGArg bh, int bhconst, + int label_index) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap; -#if defined(CONFIG_SOFTMMU) - uint32_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint32_t *label3_ptr; -#endif - int addr_reg2; -#endif + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst); + tcg_out_brcond(s, cond, ah, bh, bhconst, label_index); + break; - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; /* suppress warning */ - addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif - mem_index = *args; + default: + tcg_out_brcond(s, cond, ah, bh, bhconst, label_index); + tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst); + tcg_out_brcond(s, tcg_unsigned_cond(cond), + al, bl, blconst, label_index); + break; + } +} + +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg ret, + TCGArg c1, TCGArg c2, int c2const) +{ + tcg_out_comclr(s, tcg_invert_cond(cond), ret, c1, c2, c2const); + tcg_out_movi(s, TCG_TYPE_I32, ret, 1); +} - s_bits = opc; +static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret, + TCGArg al, TCGArg ah, TCGArg bl, int blconst, + TCGArg bh, int bhconst) +{ + int scratch = TCG_REG_R20; + + if (ret != al && ret != ah + && (blconst || ret != bl) + && (bhconst || ret != bh)) { + scratch = ret; + } - r0 = TCG_REG_R26; - r1 = TCG_REG_R25; + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_NE: + tcg_out_setcond(s, cond, scratch, al, bl, blconst); + tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst); + tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE); + break; + + default: + tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst); + tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst); + tcg_out_movi(s, TCG_TYPE_I32, scratch, 0); + tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst); + tcg_out_movi(s, TCG_TYPE_I32, scratch, 1); + break; + } + + tcg_out_mov(s, TCG_TYPE_I32, ret, scratch); +} #if defined(CONFIG_SOFTMMU) - tcg_out_mov(s, r1, addr_reg); +#include "../../softmmu_defs.h" - tcg_out_mov(s, r0, addr_reg); +static void *qemu_ld_helpers[4] = { + __ldb_mmu, + __ldw_mmu, + __ldl_mmu, + __ldq_mmu, +}; + +static void *qemu_st_helpers[4] = { + __stb_mmu, + __stw_mmu, + __stl_mmu, + __stq_mmu, +}; + +/* Load and compare a TLB entry, and branch if TLB miss. OFFSET is set to + the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate + TLB for the memory index. The return value is the offset from ENV + contained in R1 afterward (to be used when loading ADDEND); if the + return value is 0, R1 is not used. */ - tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) | - INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); +static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, + int addrhi, int s_bits, int lab_miss, int offset) +{ + int ret; + + /* Extracting the index into the TLB. The "normal C operation" is + r1 = addr_reg >> TARGET_PAGE_BITS; + r1 &= CPU_TLB_SIZE - 1; + r1 <<= CPU_TLB_ENTRY_BITS; + What this does is extract CPU_TLB_BITS beginning at TARGET_PAGE_BITS + and place them at CPU_TLB_ENTRY_BITS. We can combine the first two + operations with an EXTRU. Unfortunately, the current value of + CPU_TLB_ENTRY_BITS is > 3, so we can't merge that shift with the + add that follows. */ + tcg_out_extr(s, r1, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS, 0); + tcg_out_shli(s, r1, r1, CPU_TLB_ENTRY_BITS); + tcg_out_arith(s, r1, r1, TCG_AREG0, INSN_ADDL); + + /* Make sure that both the addr_{read,write} and addend can be + read with a 14-bit offset from the same base register. */ + if (check_fit_tl(offset + CPU_TLB_SIZE, 14)) { + ret = 0; + } else { + ret = (offset + 0x400) & ~0x7ff; + offset = ret - offset; + tcg_out_addi2(s, TCG_REG_R1, r1, ret); + r1 = TCG_REG_R1; + } - tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1), - ARITH_AND); + /* Load the entry from the computed slot. */ + if (TARGET_LONG_BITS == 64) { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R23, r1, offset); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset + 4); + } else { + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset); + } - tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, - ARITH_AND); + /* Compute the value that ought to appear in the TLB for a hit, namely, the page + of the address. We include the low N bits of the address to catch unaligned + accesses and force them onto the slow path. Do this computation after having + issued the load from the TLB slot to give the load time to complete. */ + tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD); - tcg_out_arithi(s, r1, r1, - offsetof(CPUState, tlb_table[mem_index][0].addr_write), - ARITH_ADD); + /* If not equal, jump to lab_miss. */ + if (TARGET_LONG_BITS == 64) { + tcg_out_brcond2(s, TCG_COND_NE, TCG_REG_R20, TCG_REG_R23, + r0, 0, addrhi, 0, lab_miss); + } else { + tcg_out_brcond(s, TCG_COND_NE, TCG_REG_R20, r0, 0, lab_miss); + } - tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW); + return ret; +} +#endif -#if TARGET_LONG_BITS == 32 - /* if equal, jump to label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) | - INSN_COND(COND_EQUAL)); - tcg_out_mov(s, r0, addr_reg); /* delay slot */ +static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg, + int addr_reg, int addend_reg, int opc) +{ +#ifdef TARGET_WORDS_BIGENDIAN + const int bswap = 0; #else - /* if not equal, jump to label3 */ - label3_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) | - INSN_COND(COND_EQUAL)); - tcg_out_mov(s, r0, addr_reg); /* delay slot */ - - tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW); - - /* if equal, jump to label1 */ - label1_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) | - INSN_COND(COND_EQUAL)); - tcg_out_nop(s); /* delay slot */ - - /* label3: */ - *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2); + const int bswap = 1; #endif - tcg_out_mov(s, TCG_REG_R26, addr_reg); -#if TARGET_LONG_BITS == 64 - tcg_out_mov(s, TCG_REG_R25, addr_reg2); - if (opc == 3) { - tcg_abort(); - tcg_out_mov(s, TCG_REG_R24, data_reg); - tcg_out_mov(s, TCG_REG_R23, data_reg2); - /* TODO: push mem_index */ - tcg_abort(); - } else { - switch(opc) { - case 0: - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(8)); - break; - case 1: - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(16)); - break; - case 2: - tcg_out_mov(s, TCG_REG_R24, data_reg); - break; + switch (opc) { + case 0: + tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX); + break; + case 0 | 4: + tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX); + tcg_out_ext8s(s, datalo_reg, datalo_reg); + break; + case 1: + tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX); + if (bswap) { + tcg_out_bswap16(s, datalo_reg, datalo_reg, 0); } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index); - } -#else - if (opc == 3) { - tcg_abort(); - tcg_out_mov(s, TCG_REG_R25, data_reg); - tcg_out_mov(s, TCG_REG_R24, data_reg2); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index); - } else { - switch(opc) { - case 0: - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(8)); - break; - case 1: - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) | - INSN_SHDEP_P(31) | INSN_DEP_LEN(16)); - break; - case 2: - tcg_out_mov(s, TCG_REG_R25, data_reg); - break; + break; + case 1 | 4: + tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX); + if (bswap) { + tcg_out_bswap16(s, datalo_reg, datalo_reg, 1); + } else { + tcg_out_ext16s(s, datalo_reg, datalo_reg); } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index); + break; + case 2: + tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDWX); + if (bswap) { + tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20); + } + break; + case 3: + if (bswap) { + int t = datahi_reg; + datahi_reg = datalo_reg; + datalo_reg = t; + } + /* We can't access the low-part with a reg+reg addressing mode, + so perform the addition now and use reg_ofs addressing mode. */ + if (addend_reg != TCG_REG_R0) { + tcg_out_arith(s, TCG_REG_R20, addr_reg, addend_reg, INSN_ADD); + addr_reg = TCG_REG_R20; + } + /* Make sure not to clobber the base register. */ + if (datahi_reg == addr_reg) { + tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW); + tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW); + } else { + tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW); + tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW); + } + if (bswap) { + tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20); + tcg_out_bswap32(s, datahi_reg, datahi_reg, TCG_REG_R20); + } + break; + default: + tcg_abort(); } -#endif - tcg_out_call(s, qemu_st_helpers[s_bits]); +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) +{ + int datalo_reg = *args++; + /* Note that datahi_reg is only used for 64-bit loads. */ + int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0); + int addrlo_reg = *args++; + +#if defined(CONFIG_SOFTMMU) + /* Note that addrhi_reg is only used for 64-bit guests. */ + int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); + int mem_index = *args; + int lab1, lab2, argreg, offset; + + lab1 = gen_new_label(); + lab2 = gen_new_label(); - /* jump to label2 */ - label2_ptr = (uint32_t *)s->code_ptr; - tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2); + offset = offsetof(CPUState, tlb_table[mem_index][0].addr_read); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, + opc & 3, lab1, offset); + /* TLB Hit. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + offsetof(CPUState, tlb_table[mem_index][0].addend) - offset); + tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc); + tcg_out_branch(s, lab2, 1); + + /* TLB Miss. */ /* label1: */ - *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2); + tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr); + + argreg = TCG_REG_R26; + tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + + tcg_out_call(s, qemu_ld_helpers[opc & 3]); + + switch (opc) { + case 0: + tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xff); + break; + case 0 | 4: + tcg_out_ext8s(s, datalo_reg, TCG_REG_RET0); + break; + case 1: + tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xffff); + break; + case 1 | 4: + tcg_out_ext16s(s, datalo_reg, TCG_REG_RET0); + break; + case 2: + case 2 | 4: + tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET0); + break; + case 3: + tcg_out_mov(s, TCG_TYPE_I32, datahi_reg, TCG_REG_RET0); + tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET1); + break; + default: + tcg_abort(); + } - tcg_out_arithi(s, TCG_REG_R20, r1, - offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write), - ARITH_ADD); - tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW); - tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD); + /* label2: */ + tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr); #else - r0 = addr_reg; + tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, + (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0), opc); #endif +} +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg, + int addr_reg, int opc) +{ #ifdef TARGET_WORDS_BIGENDIAN - bswap = 0; + const int bswap = 0; #else - bswap = 1; + const int bswap = 1; #endif + switch (opc) { case 0: - tcg_out_ldst(s, data_reg, r0, 0, STB); + tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STB); break; case 1: if (bswap) { - tcg_out_bswap16(s, TCG_REG_R20, data_reg); - data_reg = TCG_REG_R20; + tcg_out_bswap16(s, TCG_REG_R20, datalo_reg, 0); + datalo_reg = TCG_REG_R20; } - tcg_out_ldst(s, data_reg, r0, 0, STH); + tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STH); break; case 2: if (bswap) { - tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20); - data_reg = TCG_REG_R20; + tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20); + datalo_reg = TCG_REG_R20; } - tcg_out_ldst(s, data_reg, r0, 0, STW); + tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STW); break; case 3: + if (bswap) { + tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20); + tcg_out_bswap32(s, TCG_REG_R23, datahi_reg, TCG_REG_R23); + datahi_reg = TCG_REG_R20; + datalo_reg = TCG_REG_R23; + } + tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_STW); + tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_STW); + break; + default: tcg_abort(); - if (!bswap) { - tcg_out_ldst(s, data_reg, r0, 0, STW); - tcg_out_ldst(s, data_reg2, r0, 4, STW); + } + +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) +{ + int datalo_reg = *args++; + /* Note that datahi_reg is only used for 64-bit loads. */ + int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0); + int addrlo_reg = *args++; + +#if defined(CONFIG_SOFTMMU) + /* Note that addrhi_reg is only used for 64-bit guests. */ + int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); + int mem_index = *args; + int lab1, lab2, argreg, offset; + + lab1 = gen_new_label(); + lab2 = gen_new_label(); + + offset = offsetof(CPUState, tlb_table[mem_index][0].addr_write); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, + opc, lab1, offset); + + /* TLB Hit. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + offsetof(CPUState, tlb_table[mem_index][0].addend) - offset); + + /* There are no indexed stores, so we must do this addition explitly. + Careful to avoid R20, which is used for the bswaps to follow. */ + tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_REG_R20, INSN_ADDL); + tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, TCG_REG_R31, opc); + tcg_out_branch(s, lab2, 1); + + /* TLB Miss. */ + /* label1: */ + tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr); + + argreg = TCG_REG_R26; + tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + } + + switch(opc) { + case 0: + tcg_out_andi(s, argreg--, datalo_reg, 0xff); + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + break; + case 1: + tcg_out_andi(s, argreg--, datalo_reg, 0xffff); + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg); + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + break; + case 3: + /* Because of the alignment required by the 64-bit data argument, + we will always use R23/R24. Also, we will always run out of + argument registers for storing mem_index, so that will have + to go on the stack. */ + if (mem_index == 0) { + argreg = TCG_REG_R0; } else { - tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20); - tcg_out_ldst(s, TCG_REG_R20, r0, 4, STW); - tcg_out_bswap32(s, TCG_REG_R20, data_reg2, TCG_REG_R20); - tcg_out_ldst(s, TCG_REG_R20, r0, 0, STW); + argreg = TCG_REG_R20; + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); } + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg); + tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_SP, + TCG_TARGET_CALL_STACK_OFFSET - 4); break; default: tcg_abort(); } -#if defined(CONFIG_SOFTMMU) + tcg_out_call(s, qemu_st_helpers[opc]); + /* label2: */ - *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2); + tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr); +#else + /* There are no indexed stores, so if GUEST_BASE is set we must do the add + explicitly. Careful to avoid R20, which is used for the bswaps to follow. */ + if (GUEST_BASE != 0) { + tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL); + addrlo_reg = TCG_REG_R31; + } + tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc); #endif } -static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, - const int *const_args) +static void tcg_out_exit_tb(TCGContext *s, TCGArg arg) +{ + if (!check_fit_tl(arg, 14)) { + uint32_t hi, lo; + hi = arg & ~0x7ff; + lo = arg & 0x7ff; + if (lo) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, hi); + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18)); + tcg_out_addi(s, TCG_REG_RET0, lo); + return; + } + arg = hi; + } + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18)); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, arg); +} + +static void tcg_out_goto_tb(TCGContext *s, TCGArg arg) { - int c; + if (s->tb_jmp_offset) { + /* direct jump method */ + fprintf(stderr, "goto_tb direct\n"); + tcg_abort(); + } else { + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, TCG_REG_R0, + (tcg_target_long)(s->tb_next + arg)); + tcg_out32(s, INSN_BV_N | INSN_R2(TCG_REG_R20)); + } + s->tb_next_offset[arg] = s->code_ptr - s->code_buf; +} +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args) +{ switch (opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, args[0]); - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R18)); + tcg_out_exit_tb(s, args[0]); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* direct jump method */ - fprintf(stderr, "goto_tb direct\n"); - tcg_abort(); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, args[0]); - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20)); - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; - } else { - /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_R20, - (tcg_target_long)(s->tb_next + args[0])); - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20)); - } - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + tcg_out_goto_tb(s, args[0]); break; + case INDEX_op_call: - tcg_out32(s, BLE_SR4 | INSN_R2(args[0])); - tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31); + if (const_args[0]) { + tcg_out_call(s, (void *)args[0]); + } else { + /* ??? FIXME: the value in the register in args[0] is almost + certainly a procedure descriptor, not a code address. We + probably need to use the millicode $$dyncall routine. */ + tcg_abort(); + } break; + case INDEX_op_jmp: fprintf(stderr, "unimplemented jmp\n"); tcg_abort(); break; + case INDEX_op_br: - fprintf(stderr, "unimplemented br\n"); - tcg_abort(); + tcg_out_branch(s, args[0], 1); break; + case INDEX_op_movi_i32: tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]); break; case INDEX_op_ld8u_i32: - tcg_out_ldst(s, args[0], args[1], args[2], LDB); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB); break; case INDEX_op_ld8s_i32: - tcg_out_ldst(s, args[0], args[1], args[2], LDB); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB); tcg_out_ext8s(s, args[0], args[0]); break; case INDEX_op_ld16u_i32: - tcg_out_ldst(s, args[0], args[1], args[2], LDH); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH); break; case INDEX_op_ld16s_i32: - tcg_out_ldst(s, args[0], args[1], args[2], LDH); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH); tcg_out_ext16s(s, args[0], args[0]); break; case INDEX_op_ld_i32: - tcg_out_ldst(s, args[0], args[1], args[2], LDW); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDW); break; case INDEX_op_st8_i32: - tcg_out_ldst(s, args[0], args[1], args[2], STB); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STB); break; case INDEX_op_st16_i32: - tcg_out_ldst(s, args[0], args[1], args[2], STH); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STH); break; case INDEX_op_st_i32: - tcg_out_ldst(s, args[0], args[1], args[2], STW); + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STW); + break; + + case INDEX_op_add_i32: + if (const_args[2]) { + tcg_out_addi2(s, args[0], args[1], args[2]); + } else { + tcg_out_arith(s, args[0], args[1], args[2], INSN_ADDL); + } break; case INDEX_op_sub_i32: - c = ARITH_SUB; - goto gen_arith; + if (const_args[1]) { + if (const_args[2]) { + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1] - args[2]); + } else { + /* Recall that SUBI is a reversed subtract. */ + tcg_out_arithi(s, args[0], args[2], args[1], INSN_SUBI); + } + } else if (const_args[2]) { + tcg_out_addi2(s, args[0], args[1], -args[2]); + } else { + tcg_out_arith(s, args[0], args[1], args[2], INSN_SUB); + } + break; + case INDEX_op_and_i32: - c = ARITH_AND; - goto gen_arith; + if (const_args[2]) { + tcg_out_andi(s, args[0], args[1], args[2]); + } else { + tcg_out_arith(s, args[0], args[1], args[2], INSN_AND); + } + break; + case INDEX_op_or_i32: - c = ARITH_OR; - goto gen_arith; + if (const_args[2]) { + tcg_out_ori(s, args[0], args[1], args[2]); + } else { + tcg_out_arith(s, args[0], args[1], args[2], INSN_OR); + } + break; + case INDEX_op_xor_i32: - c = ARITH_XOR; - goto gen_arith; - case INDEX_op_add_i32: - c = ARITH_ADD; - goto gen_arith; + tcg_out_arith(s, args[0], args[1], args[2], INSN_XOR); + break; + + case INDEX_op_andc_i32: + if (const_args[2]) { + tcg_out_andi(s, args[0], args[1], ~args[2]); + } else { + tcg_out_arith(s, args[0], args[1], args[2], INSN_ANDCM); + } + break; case INDEX_op_shl_i32: - tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) | - lowsignext(0x1f, 0, 11)); - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20)); - tcg_out32(s, ZVDEP | INSN_R2(args[0]) | INSN_R1(args[1]) | - INSN_DEP_LEN(32)); + if (const_args[2]) { + tcg_out_shli(s, args[0], args[1], args[2]); + } else { + tcg_out_shl(s, args[0], args[1], args[2]); + } break; + case INDEX_op_shr_i32: - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(args[2])); - tcg_out32(s, VSHD | INSN_T(args[0]) | INSN_R1(TCG_REG_R0) | - INSN_R2(args[1])); + if (const_args[2]) { + tcg_out_shri(s, args[0], args[1], args[2]); + } else { + tcg_out_shr(s, args[0], args[1], args[2]); + } break; + case INDEX_op_sar_i32: - tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) | - lowsignext(0x1f, 0, 11)); - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20)); - tcg_out32(s, VEXTRS | INSN_R1(args[0]) | INSN_R2(args[1]) | - INSN_DEP_LEN(32)); + if (const_args[2]) { + tcg_out_sari(s, args[0], args[1], args[2]); + } else { + tcg_out_sar(s, args[0], args[1], args[2]); + } + break; + + case INDEX_op_rotl_i32: + if (const_args[2]) { + tcg_out_rotli(s, args[0], args[1], args[2]); + } else { + tcg_out_rotl(s, args[0], args[1], args[2]); + } + break; + + case INDEX_op_rotr_i32: + if (const_args[2]) { + tcg_out_rotri(s, args[0], args[1], args[2]); + } else { + tcg_out_rotr(s, args[0], args[1], args[2]); + } break; case INDEX_op_mul_i32: - fprintf(stderr, "unimplemented mul\n"); - tcg_abort(); + tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]); break; case INDEX_op_mulu2_i32: - fprintf(stderr, "unimplemented mulu2\n"); - tcg_abort(); + tcg_out_xmpyu(s, args[0], args[1], args[2], args[3]); break; - case INDEX_op_div2_i32: - fprintf(stderr, "unimplemented div2\n"); - tcg_abort(); + + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, args[0], args[1], 0); break; - case INDEX_op_divu2_i32: - fprintf(stderr, "unimplemented divu2\n"); - tcg_abort(); + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, args[0], args[1], TCG_REG_R20); + break; + + case INDEX_op_not_i32: + tcg_out_arithi(s, args[0], args[1], -1, INSN_SUBI); + break; + case INDEX_op_ext8s_i32: + tcg_out_ext8s(s, args[0], args[1]); + break; + case INDEX_op_ext16s_i32: + tcg_out_ext16s(s, args[0], args[1]); break; case INDEX_op_brcond_i32: - fprintf(stderr, "unimplemented brcond\n"); - tcg_abort(); + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]); + break; + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args[4], args[0], args[1], + args[2], const_args[2], + args[3], const_args[3], args[5]); + break; + + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args[5], args[0], args[1], args[2], + args[3], const_args[3], args[4], const_args[4]); + break; + + case INDEX_op_add2_i32: + tcg_out_add2(s, args[0], args[1], args[2], args[3], + args[4], args[5], const_args[4]); + break; + + case INDEX_op_sub2_i32: + tcg_out_sub2(s, args[0], args[1], args[2], args[3], + args[4], args[5], const_args[2], const_args[4]); break; case INDEX_op_qemu_ld8u: @@ -863,9 +1470,12 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_qemu_ld16s: tcg_out_qemu_ld(s, args, 1 | 4); break; - case INDEX_op_qemu_ld32u: + case INDEX_op_qemu_ld32: tcg_out_qemu_ld(s, args, 2); break; + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, 3); + break; case INDEX_op_qemu_st8: tcg_out_qemu_st(s, args, 0); @@ -876,87 +1486,186 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_qemu_st32: tcg_out_qemu_st(s, args, 2); break; + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, 3); + break; default: fprintf(stderr, "unknown opcode 0x%x\n", opc); tcg_abort(); } - return; - -gen_arith: - tcg_out_arith(s, args[0], args[1], args[2], c); } static const TCGTargetOpDef hppa_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, - { INDEX_op_call, { "r" } }, + { INDEX_op_call, { "ri" } }, { INDEX_op_jmp, { "r" } }, { INDEX_op_br, { } }, { INDEX_op_mov_i32, { "r", "r" } }, { INDEX_op_movi_i32, { "r" } }, + { INDEX_op_ld8u_i32, { "r", "r" } }, { INDEX_op_ld8s_i32, { "r", "r" } }, { INDEX_op_ld16u_i32, { "r", "r" } }, { INDEX_op_ld16s_i32, { "r", "r" } }, { INDEX_op_ld_i32, { "r", "r" } }, - { INDEX_op_st8_i32, { "r", "r" } }, - { INDEX_op_st16_i32, { "r", "r" } }, - { INDEX_op_st_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "rZ", "r" } }, + { INDEX_op_st16_i32, { "rZ", "r" } }, + { INDEX_op_st_i32, { "rZ", "r" } }, - { INDEX_op_add_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "r", "r" } }, - { INDEX_op_and_i32, { "r", "r", "r" } }, - { INDEX_op_or_i32, { "r", "r", "r" } }, - { INDEX_op_xor_i32, { "r", "r", "r" } }, + { INDEX_op_add_i32, { "r", "rZ", "ri" } }, + { INDEX_op_sub_i32, { "r", "rI", "ri" } }, + { INDEX_op_and_i32, { "r", "rZ", "rM" } }, + { INDEX_op_or_i32, { "r", "rZ", "rO" } }, + { INDEX_op_xor_i32, { "r", "rZ", "rZ" } }, + /* Note that the second argument will be inverted, which means + we want a constant whose inversion matches M, and that O = ~M. + See the implementation of and_mask_p. */ + { INDEX_op_andc_i32, { "r", "rZ", "rO" } }, - { INDEX_op_shl_i32, { "r", "r", "r" } }, - { INDEX_op_shr_i32, { "r", "r", "r" } }, - { INDEX_op_sar_i32, { "r", "r", "r" } }, + { INDEX_op_mul_i32, { "r", "r", "r" } }, + { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, - { INDEX_op_brcond_i32, { "r", "r" } }, + { INDEX_op_shl_i32, { "r", "r", "ri" } }, + { INDEX_op_shr_i32, { "r", "r", "ri" } }, + { INDEX_op_sar_i32, { "r", "r", "ri" } }, + { INDEX_op_rotl_i32, { "r", "r", "ri" } }, + { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_not_i32, { "r", "r" } }, + + { INDEX_op_ext8s_i32, { "r", "r" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + + { INDEX_op_brcond_i32, { "rZ", "rJ" } }, + { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } }, + + { INDEX_op_setcond_i32, { "r", "rZ", "rI" } }, + { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } }, + { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } }, #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "r", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld64, { "r", "r", "L" } }, - { INDEX_op_qemu_st8, { "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L", "L" } }, + { INDEX_op_qemu_st8, { "LZ", "L" } }, + { INDEX_op_qemu_st16, { "LZ", "L" } }, + { INDEX_op_qemu_st32, { "LZ", "L" } }, + { INDEX_op_qemu_st64, { "LZ", "LZ", "L" } }, #else { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, - { INDEX_op_qemu_st8, { "L", "L", "L" } }, - { INDEX_op_qemu_st16, { "L", "L", "L" } }, - { INDEX_op_qemu_st32, { "L", "L", "L" } }, - { INDEX_op_qemu_st64, { "L", "L", "L", "L" } }, + { INDEX_op_qemu_st8, { "LZ", "L", "L" } }, + { INDEX_op_qemu_st16, { "LZ", "L", "L" } }, + { INDEX_op_qemu_st32, { "LZ", "L", "L" } }, + { INDEX_op_qemu_st64, { "LZ", "LZ", "L", "L" } }, #endif { -1 }, }; -void tcg_target_init(TCGContext *s) +static int tcg_target_callee_save_regs[] = { + /* R2, the return address register, is saved specially + in the caller's frame. */ + /* R3, the frame pointer, is not currently modified. */ + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R16, + /* R17 is the global env, so no need to save. */ + TCG_REG_R18 +}; + +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int frame_size, i; + + /* Allocate space for the fixed frame marker. */ + frame_size = -TCG_TARGET_CALL_STACK_OFFSET; + frame_size += TCG_TARGET_STATIC_CALL_ARGS_SIZE; + + /* Allocate space for the saved registers. */ + frame_size += ARRAY_SIZE(tcg_target_callee_save_regs) * 4; + + /* Align the allocated space. */ + frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1) + & -TCG_TARGET_STACK_ALIGN); + + /* The return address is stored in the caller's frame. */ + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -20); + + /* Allocate stack frame, saving the first register at the same time. */ + tcg_out_ldst(s, tcg_target_callee_save_regs[0], + TCG_REG_SP, frame_size, INSN_STWM); + + /* Save all callee saved registers. */ + for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_st(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i], + TCG_REG_SP, -frame_size + i * 4); + } + +#ifdef CONFIG_USE_GUEST_BASE + if (GUEST_BASE != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } +#endif + + /* Jump to TB, and adjust R18 to be the return address. */ + tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R26)); + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R18, TCG_REG_R31); + + /* Restore callee saved registers. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -frame_size - 20); + for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i], + TCG_REG_SP, -frame_size + i * 4); + } + + /* Deallocate stack frame and return. */ + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_RP)); + tcg_out_ldst(s, tcg_target_callee_save_regs[0], + TCG_REG_SP, -frame_size, INSN_LDWM); +} + +static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_R20) | - (1 << TCG_REG_R21) | - (1 << TCG_REG_R22) | - (1 << TCG_REG_R23) | - (1 << TCG_REG_R24) | - (1 << TCG_REG_R25) | - (1 << TCG_REG_R26)); + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET0); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET1); tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* hardwired to zero */ diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 7ab6f0c..a5cc440 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -69,135 +69,51 @@ enum { TCG_REG_R31, }; +#define TCG_CT_CONST_0 0x0100 +#define TCG_CT_CONST_S5 0x0200 +#define TCG_CT_CONST_S11 0x0400 +#define TCG_CT_CONST_MS11 0x0800 +#define TCG_CT_CONST_AND 0x1000 +#define TCG_CT_CONST_OR 0x2000 + /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_SP -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_STACK_ALIGN 64 +#define TCG_TARGET_CALL_STACK_OFFSET -48 +#define TCG_TARGET_STATIC_CALL_ARGS_SIZE 8*4 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_STACK_GROWSUP /* optional instructions */ -//#define TCG_TARGET_HAS_ext8s_i32 -//#define TCG_TARGET_HAS_ext16s_i32 -//#define TCG_TARGET_HAS_bswap16_i32 -//#define TCG_TARGET_HAS_bswap32_i32 +// #define TCG_TARGET_HAS_div_i32 +#define TCG_TARGET_HAS_rot_i32 +#define TCG_TARGET_HAS_ext8s_i32 +#define TCG_TARGET_HAS_ext16s_i32 +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 +#define TCG_TARGET_HAS_not_i32 +#define TCG_TARGET_HAS_andc_i32 +// #define TCG_TARGET_HAS_orc_i32 + +/* optional instructions automatically implemented */ +#undef TCG_TARGET_HAS_neg_i32 /* sub rd, 0, rs */ +#undef TCG_TARGET_HAS_ext8u_i32 /* and rd, rs, 0xff */ +#undef TCG_TARGET_HAS_ext16u_i32 /* and rd, rs, 0xffff */ + +#define TCG_TARGET_HAS_GUEST_BASE /* Note: must be synced with dyngen-exec.h */ #define TCG_AREG0 TCG_REG_R17 -#define TCG_AREG1 TCG_REG_R14 -#define TCG_AREG2 TCG_REG_R15 static inline void flush_icache_range(unsigned long start, unsigned long stop) { start &= ~31; - while (start <= stop) - { - asm volatile ("fdc 0(%0)\n" - "sync\n" - "fic 0(%%sr4, %0)\n" - "sync\n" + while (start <= stop) { + asm volatile ("fdc 0(%0)\n\t" + "sync\n\t" + "fic 0(%%sr4, %0)\n\t" + "sync" : : "r"(start) : "memory"); start += 32; } } - -/* supplied by libgcc */ -extern void *__canonicalize_funcptr_for_compare(void *); - -/* Field selection types defined by hppa */ -#define rnd(x) (((x)+0x1000)&~0x1fff) -/* lsel: select left 21 bits */ -#define lsel(v,a) (((v)+(a))>>11) -/* rsel: select right 11 bits */ -#define rsel(v,a) (((v)+(a))&0x7ff) -/* lrsel with rounding of addend to nearest 8k */ -#define lrsel(v,a) (((v)+rnd(a))>>11) -/* rrsel with rounding of addend to nearest 8k */ -#define rrsel(v,a) ((((v)+rnd(a))&0x7ff)+((a)-rnd(a))) - -#define mask(x,sz) ((x) & ~((1<<(sz))-1)) - -static inline int reassemble_12(int as12) -{ - return (((as12 & 0x800) >> 11) | - ((as12 & 0x400) >> 8) | - ((as12 & 0x3ff) << 3)); -} - -static inline int reassemble_14(int as14) -{ - return (((as14 & 0x1fff) << 1) | - ((as14 & 0x2000) >> 13)); -} - -static inline int reassemble_17(int as17) -{ - return (((as17 & 0x10000) >> 16) | - ((as17 & 0x0f800) << 5) | - ((as17 & 0x00400) >> 8) | - ((as17 & 0x003ff) << 3)); -} - -static inline int reassemble_21(int as21) -{ - return (((as21 & 0x100000) >> 20) | - ((as21 & 0x0ffe00) >> 8) | - ((as21 & 0x000180) << 7) | - ((as21 & 0x00007c) << 14) | - ((as21 & 0x000003) << 12)); -} - -static inline void hppa_patch21l(uint32_t *insn, int val, int addend) -{ - val = lrsel(val, addend); - *insn = mask(*insn, 21) | reassemble_21(val); -} - -static inline void hppa_patch14r(uint32_t *insn, int val, int addend) -{ - val = rrsel(val, addend); - *insn = mask(*insn, 14) | reassemble_14(val); -} - -static inline void hppa_patch17r(uint32_t *insn, int val, int addend) -{ - val = rrsel(val, addend); - *insn = (*insn & ~0x1f1ffd) | reassemble_17(val); -} - - -static inline void hppa_patch21l_dprel(uint32_t *insn, int val, int addend) -{ - register unsigned int dp asm("r27"); - hppa_patch21l(insn, val - dp, addend); -} - -static inline void hppa_patch14r_dprel(uint32_t *insn, int val, int addend) -{ - register unsigned int dp asm("r27"); - hppa_patch14r(insn, val - dp, addend); -} - -static inline void hppa_patch17f(uint32_t *insn, int val, int addend) -{ - int dot = (int)insn & ~0x3; - int v = ((val + addend) - dot - 8) / 4; - if (v > (1 << 16) || v < -(1 << 16)) { - printf("cannot fit branch to offset %d [%08x->%08x]\n", v, dot, val); - abort(); - } - *insn = (*insn & ~0x1f1ffd) | reassemble_17(v); -} - -static inline void hppa_load_imm21l(uint32_t *insn, int val, int addend) -{ - /* Transform addil L'sym(%dp) to ldil L'val, %r1 */ - *insn = 0x20200000 | reassemble_21(lrsel(val, 0)); -} - -static inline void hppa_load_imm14r(uint32_t *insn, int val, int addend) -{ - /* Transform ldw R'sym(%r1), %rN to ldo R'sym(%r1), %rN */ - hppa_patch14r(insn, val, addend); - /* HACK */ - if (addend == 0) - *insn = (*insn & ~0xfc000000) | (0x0d << 26); -} diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 972b102..09a5c55 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -24,18 +24,33 @@ #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { - "%eax", - "%ecx", - "%edx", - "%ebx", - "%esp", - "%ebp", - "%esi", - "%edi", +#if TCG_TARGET_REG_BITS == 64 + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif }; #endif static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RCX, + TCG_REG_RDX, + TCG_REG_RSI, + TCG_REG_RDI, + TCG_REG_RAX, +#else TCG_REG_EAX, TCG_REG_EDX, TCG_REG_ECX, @@ -43,23 +58,49 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_ESI, TCG_REG_EDI, TCG_REG_EBP, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RDI, + TCG_REG_RSI, + TCG_REG_RDX, + TCG_REG_RCX, + TCG_REG_R8, + TCG_REG_R9, +#else + TCG_REG_EAX, + TCG_REG_EDX, + TCG_REG_ECX +#endif }; -static const int tcg_target_call_iarg_regs[3] = { TCG_REG_EAX, TCG_REG_EDX, TCG_REG_ECX }; -static const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX }; +static const int tcg_target_call_oarg_regs[2] = { + TCG_REG_EAX, + TCG_REG_EDX +}; static uint8_t *tb_ret_addr; -static void patch_reloc(uint8_t *code_ptr, int type, +static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { value += addend; switch(type) { - case R_386_32: + case R_386_PC32: + value -= (uintptr_t)code_ptr; + if (value != (int32_t)value) { + tcg_abort(); + } *(uint32_t *)code_ptr = value; break; - case R_386_PC32: - *(uint32_t *)code_ptr = value - (long)code_ptr; + case R_386_PC8: + value -= (uintptr_t)code_ptr; + if (value != (int8_t)value) { + tcg_abort(); + } + *(uint8_t *)code_ptr = value; break; default: tcg_abort(); @@ -69,6 +110,10 @@ static void patch_reloc(uint8_t *code_ptr, int type, /* maximum number of register used for input function arguments */ static inline int tcg_target_get_call_iarg_regs_count(int flags) { + if (TCG_TARGET_REG_BITS == 64) { + return 6; + } + flags &= TCG_CALL_TYPE_MASK; switch(flags) { case TCG_CALL_TYPE_STD: @@ -115,20 +160,42 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) break; case 'q': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xf); + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xf); + } break; case 'r': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xff); + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } break; /* qemu_ld/st address constraint */ case 'L': ct->ct |= TCG_CT_REG; - tcg_regset_set32(ct->u.regs, 0, 0xff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); + } break; + + case 'e': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_U32; + break; + default: return -1; } @@ -141,14 +208,83 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) static inline int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct) { - int ct; - ct = arg_ct->ct; - if (ct & TCG_CT_CONST) + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { return 1; - else - return 0; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } + return 0; } +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x) ((x) & 7) +#else +# define LOWREGMASK(x) (x) +#endif + +#define P_EXT 0x100 /* 0x0f opcode prefix */ +#define P_DATA16 0x200 /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32 0x400 /* 0x67 opcode prefix */ +# define P_REXW 0x800 /* Set REX.W = 1 */ +# define P_REXB_R 0x1000 /* REG field as byte register */ +# define P_REXB_RM 0x2000 /* R/M field as byte register */ +#else +# define P_ADDR32 0 +# define P_REXW 0 +# define P_REXB_R 0 +# define P_REXB_RM 0 +#endif + +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_POP_r32 (0x58) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_TESTL (0x85) +#define OPC_XCHG_ax_r32 (0x90) + +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) + +/* Group 1 opcode extensions for 0x80-0x83. + These are also used as modifiers for OPC_ARITH. */ #define ARITH_ADD 0 #define ARITH_OR 1 #define ARITH_ADC 2 @@ -158,12 +294,28 @@ static inline int tcg_target_const_match(tcg_target_long val, #define ARITH_XOR 6 #define ARITH_CMP 7 +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ #define SHIFT_ROL 0 #define SHIFT_ROR 1 #define SHIFT_SHL 4 #define SHIFT_SHR 5 #define SHIFT_SAR 7 +/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_NOT 2 +#define EXT3_NEG 3 +#define EXT3_MUL 4 +#define EXT3_IMUL 5 +#define EXT3_DIV 6 +#define EXT3_IDIV 7 + +/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ +#define EXT5_INC_Ev 0 +#define EXT5_DEC_Ev 1 +#define EXT5_CALLN_Ev 2 +#define EXT5_JMPN_Ev 4 + +/* Condition codes to be added to OPC_JCC_{long,short}. */ #define JCC_JMP (-1) #define JCC_JO 0x0 #define JCC_JNO 0x1 @@ -182,8 +334,6 @@ static inline int tcg_target_const_match(tcg_target_long val, #define JCC_JLE 0xe #define JCC_JG 0xf -#define P_EXT 0x100 /* 0x0f opcode prefix */ - static const uint8_t tcg_cond_to_jcc[10] = { [TCG_COND_EQ] = JCC_JE, [TCG_COND_NE] = JCC_JNE, @@ -197,228 +347,616 @@ static const uint8_t tcg_cond_to_jcc[10] = { [TCG_COND_GTU] = JCC_JA, }; -static inline void tcg_out_opc(TCGContext *s, int opc) +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ + int rex; + + if (opc & P_DATA16) { + /* We should never be asking for both 16 and 64-bit operation. */ + assert((opc & P_REXW) == 0); + tcg_out8(s, 0x66); + } + if (opc & P_ADDR32) { + tcg_out8(s, 0x67); + } + + rex = 0; + rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + + /* P_REXB_{R,RM} indicates that the given register is the low byte. + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, + as otherwise the encoding indicates %[abcd]h. Note that the values + that are ORed in merely indicate that the REX byte must be present; + those bits get discarded in output. */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + + if (rex) { + tcg_out8(s, (uint8_t)(rex | 0x40)); + } + + if (opc & P_EXT) { + tcg_out8(s, 0x0f); + } + tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) { - if (opc & P_EXT) + if (opc & P_DATA16) { + tcg_out8(s, 0x66); + } + if (opc & P_EXT) { tcg_out8(s, 0x0f); + } tcg_out8(s, opc); } +/* Discard the register arguments to tcg_out_opc early, so as not to penalize + the 32-bit compilation paths. This method works with all versions of gcc, + whereas relying on optimization may not be able to exclude them. */ +#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) +#endif -static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) { - tcg_out_opc(s, opc); - tcg_out8(s, 0xc0 | (r << 3) | rm); + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } -/* rm == -1 means no register index */ -static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, - int32_t offset) +/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. + We handle either RM and INDEX missing with a negative value. In 64-bit + mode for absolute addresses, ~RM is the size of the immediate operand + that will follow the instruction. */ + +static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, + int index, int shift, + tcg_target_long offset) { - tcg_out_opc(s, opc); - if (rm == -1) { - tcg_out8(s, 0x05 | (r << 3)); - tcg_out32(s, offset); - } else if (offset == 0 && rm != TCG_REG_EBP) { - if (rm == TCG_REG_ESP) { - tcg_out8(s, 0x04 | (r << 3)); - tcg_out8(s, 0x24); - } else { - tcg_out8(s, 0x00 | (r << 3) | rm); - } - } else if ((int8_t)offset == offset) { - if (rm == TCG_REG_ESP) { - tcg_out8(s, 0x44 | (r << 3)); - tcg_out8(s, 0x24); + int mod, len; + + if (index < 0 && rm < 0) { + if (TCG_TARGET_REG_BITS == 64) { + /* Try for a rip-relative addressing mode. This has replaced + the 32-bit-mode absolute addressing encoding. */ + tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm; + tcg_target_long disp = offset - pc; + if (disp == (int32_t)disp) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 5); + tcg_out32(s, disp); + return; + } + + /* Try for an absolute address encoding. This requires the + use of the MODRM+SIB encoding and is therefore larger than + rip-relative addressing. */ + if (offset == (int32_t)offset) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (4 << 3) | 5); + tcg_out32(s, offset); + return; + } + + /* ??? The memory isn't directly addressable. */ + tcg_abort(); } else { - tcg_out8(s, 0x40 | (r << 3) | rm); + /* Absolute address. */ + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (r << 3) | 5); + tcg_out32(s, offset); + return; } - tcg_out8(s, offset); + } + + /* Find the length of the immediate addend. Note that the encoding + that would be used for (%ebp) indicates absolute addressing. */ + if (rm < 0) { + mod = 0, len = 4, rm = 5; + } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { + mod = 0, len = 0; + } else if (offset == (int8_t)offset) { + mod = 0x40, len = 1; + } else { + mod = 0x80, len = 4; + } + + /* Use a single byte MODRM format if possible. Note that the encoding + that would be used for %esp is the escape to the two byte form. */ + if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { + /* Single byte MODRM format. */ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } else { - if (rm == TCG_REG_ESP) { - tcg_out8(s, 0x84 | (r << 3)); - tcg_out8(s, 0x24); + /* Two byte MODRM+SIB format. */ + + /* Note that the encoding that would place %esp into the index + field indicates no index register. In 64-bit mode, the REX.X + bit counts, so %r12 can be used as the index. */ + if (index < 0) { + index = 4; } else { - tcg_out8(s, 0x80 | (r << 3) | rm); + assert(index != TCG_REG_ESP); } + + tcg_out_opc(s, opc, r, rm, index); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); + } + + if (len == 1) { + tcg_out8(s, offset); + } else if (len == 4) { tcg_out32(s, offset); } } -static inline void tcg_out_mov(TCGContext *s, int ret, int arg) +/* A simplification of the above with no index or shift. */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, + int rm, tcg_target_long offset) +{ + tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + /* Propagate an opcode prefix, such as P_REXW. */ + int ext = subop & ~0x7; + subop &= 0x7; + + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) { - if (arg != ret) - tcg_out_modrm(s, 0x8b, ret, arg); + if (arg != ret) { + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm(s, opc, ret, arg); + } } -static inline void tcg_out_movi(TCGContext *s, TCGType type, - int ret, int32_t arg) +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg) { if (arg == 0) { - /* xor r0,r0 */ - tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret); + tgen_arithr(s, ARITH_XOR, ret, ret); + return; + } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + } else if (arg == (int32_t)arg) { + tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); + tcg_out32(s, arg); } else { - tcg_out8(s, 0xb8 + ret); + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); tcg_out32(s, arg); + tcg_out32(s, arg >> 31 >> 1); + } +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ + if (val == (int8_t)val) { + tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); + tcg_out8(s, val); + } else if (val == (int32_t)val) { + tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); + tcg_out32(s, val); + } else { + tcg_abort(); } } +static inline void tcg_out_push(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1, tcg_target_long arg2) { - /* movl */ - tcg_out_modrm_offset(s, 0x8b, ret, arg1, arg2); + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, ret, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, int arg1, tcg_target_long arg2) { - /* movl */ - tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2); + int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, arg, arg1, arg2); } -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf) +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) { - if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) { - /* inc */ - tcg_out_opc(s, 0x40 + r0); - } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) { - /* dec */ - tcg_out_opc(s, 0x48 + r0); - } else if (val == (int8_t)val) { - tcg_out_modrm(s, 0x83, c, r0); - tcg_out8(s, val); - } else if (c == ARITH_AND && val == 0xffu && r0 < 4) { - /* movzbl */ - tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0); - } else if (c == ARITH_AND && val == 0xffffu) { - /* movzwl */ - tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0); + /* Propagate an opcode prefix, such as P_DATA16. */ + int ext = subopc & ~0x7; + subopc &= 0x7; + + if (count == 1) { + tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); } else { - tcg_out_modrm(s, 0x81, c, r0); + tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); + tcg_out8(s, count); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ + tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ + /* movzbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ + /* movzwl */ + tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsw[lq] */ + tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ + /* 32-bit mov zero extends. */ + tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ + tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, + tcg_target_long val, int cf) +{ + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64) { + rexw = c & -8; + c &= 7; + } + + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int is_inc = (c == ARITH_ADD) ^ (val < 0); + if (TCG_TARGET_REG_BITS == 64) { + /* The single-byte increment encodings are re-tasked as the + REX prefixes. Use the MODRM encoding. */ + tcg_out_modrm(s, OPC_GRP5 + rexw, + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + + if (c == ARITH_AND) { + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t)val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); + return; + } + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; + } + } + + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); + tcg_out8(s, val); + return; + } + if (rexw == 0 || val == (int32_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); tcg_out32(s, val); + return; } + + tcg_abort(); } static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) { - if (val != 0) - tgen_arithi(s, ARITH_ADD, reg, val, 0); + if (val != 0) { + tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); + } } -static void tcg_out_jxx(TCGContext *s, int opc, int label_index) +#undef small /* for mingw build */ + +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) { int32_t val, val1; TCGLabel *l = &s->labels[label_index]; - + if (l->has_value) { val = l->u.value - (tcg_target_long)s->code_ptr; val1 = val - 2; if ((int8_t)val1 == val1) { - if (opc == -1) - tcg_out8(s, 0xeb); - else - tcg_out8(s, 0x70 + opc); + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } tcg_out8(s, val1); } else { + if (small) { + tcg_abort(); + } if (opc == -1) { - tcg_out8(s, 0xe9); + tcg_out8(s, OPC_JMP_long); tcg_out32(s, val - 5); } else { - tcg_out8(s, 0x0f); - tcg_out8(s, 0x80 + opc); + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); tcg_out32(s, val - 6); } } + } else if (small) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1); + s->code_ptr += 1; } else { if (opc == -1) { - tcg_out8(s, 0xe9); + tcg_out8(s, OPC_JMP_long); } else { - tcg_out8(s, 0x0f); - tcg_out8(s, 0x80 + opc); + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); } tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4); s->code_ptr += 4; } } -static void tcg_out_brcond(TCGContext *s, int cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index) +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, + int const_arg2, int rexw) { if (const_arg2) { if (arg2 == 0) { /* test r, r */ - tcg_out_modrm(s, 0x85, arg1, arg1); + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); } else { - tgen_arithi(s, ARITH_CMP, arg1, arg2, 0); + tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); } } else { - tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); } - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index); } +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} +#else /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, - const TCGArg *args, const int *const_args) +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) { int label_next; label_next = gen_new_label(); switch(args[4]) { case TCG_COND_EQ: - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next); - tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]); + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], + args[5], small); break; case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]); - tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]); + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + args[5], small); + tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], + args[5], small); break; case TCG_COND_LT: - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_LE: - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_GT: - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_GE: - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_LTU: - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_GTU: - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); break; case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]); + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); break; default: tcg_abort(); } tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); } +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGArg new_args[6]; + int label_true, label_over; + + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (args[0] == args[1] || args[0] == args[2] + || (!const_args[3] && args[0] == args[3]) + || (!const_args[4] && args[0] == args[4])) { + /* When the destination overlaps with one of the argument + registers, don't do anything tricky. */ + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_true; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } else { + /* When the destination does not overlap one of the arguments, + clear the destination first, jump if cond false, and emit an + increment in the true case. This results in smaller code. */ + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[4] = tcg_invert_cond(new_args[4]); + new_args[5] = label_over; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) +{ + tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5; + + if (disp == (int32_t)disp) { + tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest); + tcg_out_modrm(s, OPC_GRP5, + call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); + } +} + +static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest) +{ + tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) +{ + tcg_out_branch(s, 0, dest); +} #if defined(CONFIG_SOFTMMU) @@ -437,647 +975,670 @@ static void *qemu_st_helpers[4] = { __stl_mmu, __stq_mmu, }; -#endif -#ifndef CONFIG_USER_ONLY -#define GUEST_BASE 0 -#endif +/* Perform the TLB load and compare. -/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and - EAX. It will be useful once fixed registers globals are less - common. */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - int opc) + Inputs: + ADDRLO_IDX contains the index into ARGS of the low part of the + address; the high part of the address is at ADDR_LOW_IDX+1. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + Outputs: + LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) + positions of the displacements of forward jumps to the TLB miss case. + + First argument register is loaded with the low part of the address. + In the TLB hit case, it has been adjusted as indicated by the TLB + and so is a host address. In the TLB miss case, it continues to + hold a guest address. + + Second argument register is clobbered. */ + +static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, + int mem_index, int s_bits, + const TCGArg *args, + uint8_t **label_ptr, int which) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap; -#if defined(CONFIG_SOFTMMU) - uint8_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint8_t *label3_ptr; -#endif - int addr_reg2; -#endif + const int addrlo = args[addrlo_idx]; + const int r0 = tcg_target_call_iarg_regs[0]; + const int r1 = tcg_target_call_iarg_regs[1]; + TCGType type = TCG_TYPE_I32; + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) { + type = TCG_TYPE_I64; + rexw = P_REXW; + } - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif - mem_index = *args; - s_bits = opc & 3; + tcg_out_mov(s, type, r1, addrlo); + tcg_out_mov(s, type, r0, addrlo); - r0 = TCG_REG_EAX; - r1 = TCG_REG_EDX; + tcg_out_shifti(s, SHIFT_SHR + rexw, r1, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); -#if defined(CONFIG_SOFTMMU) - tcg_out_mov(s, r1, addr_reg); - - tcg_out_mov(s, r0, addr_reg); - - tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */ - tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */ - tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - - tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */ - tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - - tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */ - tcg_out8(s, 0x80 | (r1 << 3) | 0x04); - tcg_out8(s, (5 << 3) | r1); - tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read)); + tgen_arithi(s, ARITH_AND + rexw, r0, + TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + tgen_arithi(s, ARITH_AND + rexw, r1, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0, + offsetof(CPUState, tlb_table[mem_index][0]) + + which); /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); - - tcg_out_mov(s, r0, addr_reg); - -#if TARGET_LONG_BITS == 32 - /* je label1 */ - tcg_out8(s, 0x70 + JCC_JE); - label1_ptr = s->code_ptr; - s->code_ptr++; -#else - /* jne label3 */ - tcg_out8(s, 0x70 + JCC_JNE); - label3_ptr = s->code_ptr; - s->code_ptr++; - - /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0); + + tcg_out_mov(s, type, r0, addrlo); - /* je label1 */ - tcg_out8(s, 0x70 + JCC_JE); - label1_ptr = s->code_ptr; + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[0] = s->code_ptr; s->code_ptr++; - - /* label3: */ - *label3_ptr = s->code_ptr - label3_ptr - 1; -#endif - /* XXX: move that code at the end of the TB */ -#if TARGET_LONG_BITS == 32 - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EDX, mem_index); -#else - tcg_out_mov(s, TCG_REG_EDX, addr_reg2); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index); -#endif - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_ld_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(r1), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); - switch(opc) { - case 0 | 4: - /* movsbl */ - tcg_out_modrm(s, 0xbe | P_EXT, data_reg, TCG_REG_EAX); - break; - case 1 | 4: - /* movswl */ - tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX); - break; - case 0: - /* movzbl */ - tcg_out_modrm(s, 0xb6 | P_EXT, data_reg, TCG_REG_EAX); - break; - case 1: - /* movzwl */ - tcg_out_modrm(s, 0xb7 | P_EXT, data_reg, TCG_REG_EAX); - break; - case 2: - default: - tcg_out_mov(s, data_reg, TCG_REG_EAX); - break; - case 3: - if (data_reg == TCG_REG_EDX) { - tcg_out_opc(s, 0x90 + TCG_REG_EDX); /* xchg %edx, %eax */ - tcg_out_mov(s, data_reg2, TCG_REG_EAX); - } else { - tcg_out_mov(s, data_reg, TCG_REG_EAX); - tcg_out_mov(s, data_reg2, TCG_REG_EDX); - } - break; + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[1] = s->code_ptr; + s->code_ptr++; } - /* jmp label2 */ - tcg_out8(s, 0xeb); - label2_ptr = s->code_ptr; - s->code_ptr++; - - /* label1: */ - *label1_ptr = s->code_ptr - label1_ptr - 1; + /* TLB Hit. */ - /* add x(r1), r0 */ - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read)); -#else - r0 = addr_reg; + /* add addend(r1), r0 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1, + offsetof(CPUTLBEntry, addend) - which); +} #endif +static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ #ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; + const int bswap = 1; #else - bswap = 0; + const int bswap = 0; #endif - switch(opc) { + switch (sizeop) { case 0: - /* movzbl */ - tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, GUEST_BASE); + tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs); break; case 0 | 4: - /* movsbl */ - tcg_out_modrm_offset(s, 0xbe | P_EXT, data_reg, r0, GUEST_BASE); + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs); break; case 1: - /* movzwl */ - tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, GUEST_BASE); + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); if (bswap) { - /* rolw $8, data_reg */ - tcg_out8(s, 0x66); - tcg_out_modrm(s, 0xc1, 0, data_reg); - tcg_out8(s, 8); + tcg_out_rolw_8(s, datalo); } break; case 1 | 4: - /* movswl */ - tcg_out_modrm_offset(s, 0xbf | P_EXT, data_reg, r0, GUEST_BASE); if (bswap) { - /* rolw $8, data_reg */ - tcg_out8(s, 0x66); - tcg_out_modrm(s, 0xc1, 0, data_reg); - tcg_out8(s, 8); - - /* movswl data_reg, data_reg */ - tcg_out_modrm(s, 0xbf | P_EXT, data_reg, data_reg); + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + tcg_out_rolw_8(s, datalo); + tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs); } break; case 2: - /* movl (r0), data_reg */ - tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE); + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); if (bswap) { - /* bswap */ - tcg_out_opc(s, (0xc8 + data_reg) | P_EXT); + tcg_out_bswap32(s, datalo); } break; - case 3: - /* XXX: could be nicer */ - if (r0 == data_reg) { - r1 = TCG_REG_EDX; - if (r1 == data_reg) - r1 = TCG_REG_EAX; - tcg_out_mov(s, r1, r0); - r0 = r1; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + if (bswap) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_bswap32(s, datalo); + tcg_out_ext32s(s, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs); } - if (!bswap) { - tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE); - tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4); + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs); + if (bswap) { + tcg_out_bswap64(s, datalo); + } } else { - tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4); - tcg_out_opc(s, (0xc8 + data_reg) | P_EXT); - - tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE); - /* bswap */ - tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT); + if (bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + if (base != datalo) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + } else { + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + } + if (bswap) { + tcg_out_bswap32(s, datalo); + tcg_out_bswap32(s, datahi); + } } break; default: tcg_abort(); } - -#if defined(CONFIG_SOFTMMU) - /* label2: */ - *label2_ptr = s->code_ptr - label2_ptr - 1; -#endif } - -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and + EAX. It will be useful once fixed registers globals are less + common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap; + int data_reg, data_reg2 = 0; + int addrlo_idx; #if defined(CONFIG_SOFTMMU) - uint8_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint8_t *label3_ptr; -#endif - int addr_reg2; + int mem_index, s_bits, arg_idx; + uint8_t *label_ptr[3]; #endif - data_reg = *args++; - if (opc == 3) - data_reg2 = *args++; - else - data_reg2 = 0; - addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif - mem_index = *args; + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } - s_bits = opc; +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc & 3; - r0 = TCG_REG_EAX; - r1 = TCG_REG_EDX; + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_read)); -#if defined(CONFIG_SOFTMMU) - tcg_out_mov(s, r1, addr_reg); - - tcg_out_mov(s, r0, addr_reg); - - tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */ - tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */ - tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - - tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */ - tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - - tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */ - tcg_out8(s, 0x80 | (r1 << 3) | 0x04); - tcg_out8(s, (5 << 3) | r1); - tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write)); + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); - /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); - - tcg_out_mov(s, r0, addr_reg); - -#if TARGET_LONG_BITS == 32 - /* je label1 */ - tcg_out8(s, 0x70 + JCC_JE); - label1_ptr = s->code_ptr; - s->code_ptr++; -#else - /* jne label3 */ - tcg_out8(s, 0x70 + JCC_JNE); - label3_ptr = s->code_ptr; + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; s->code_ptr++; - - /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); - /* je label1 */ - tcg_out8(s, 0x70 + JCC_JE); - label1_ptr = s->code_ptr; - s->code_ptr++; - - /* label3: */ - *label3_ptr = s->code_ptr - label3_ptr - 1; -#endif + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } /* XXX: move that code at the end of the TB */ -#if TARGET_LONG_BITS == 32 - if (opc == 3) { - tcg_out_mov(s, TCG_REG_EDX, data_reg); - tcg_out_mov(s, TCG_REG_ECX, data_reg2); - tcg_out8(s, 0x6a); /* push Ib */ - tcg_out8(s, mem_index); - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); - tcg_out_addi(s, TCG_REG_ESP, 4); - } else { - switch(opc) { - case 0: - /* movzbl */ - tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_EDX, data_reg); - break; - case 1: - /* movzwl */ - tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_EDX, data_reg); - break; - case 2: - tcg_out_mov(s, TCG_REG_EDX, data_reg); - break; - } - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index); - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); + /* The first argument is already loaded with addrlo. */ + arg_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++], + args[addrlo_idx + 1]); } -#else - if (opc == 3) { - tcg_out_mov(s, TCG_REG_EDX, addr_reg2); - tcg_out8(s, 0x6a); /* push Ib */ - tcg_out8(s, mem_index); - tcg_out_opc(s, 0x50 + data_reg2); /* push */ - tcg_out_opc(s, 0x50 + data_reg); /* push */ - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); - tcg_out_addi(s, TCG_REG_ESP, 12); - } else { - tcg_out_mov(s, TCG_REG_EDX, addr_reg2); - switch(opc) { - case 0: - /* movzbl */ - tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_ECX, data_reg); - break; - case 1: - /* movzwl */ - tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_ECX, data_reg); - break; - case 2: - tcg_out_mov(s, TCG_REG_ECX, data_reg); - break; + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], + mem_index); + tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); + + switch(opc) { + case 0 | 4: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 1 | 4: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 0: + tcg_out_ext8u(s, data_reg, TCG_REG_EAX); + break; + case 1: + tcg_out_ext16u(s, data_reg, TCG_REG_EAX); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX); } - tcg_out8(s, 0x6a); /* push Ib */ - tcg_out8(s, mem_index); - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); - tcg_out_addi(s, TCG_REG_ESP, 4); + break; + default: + tcg_abort(); } -#endif - - /* jmp label2 */ - tcg_out8(s, 0xeb); - label2_ptr = s->code_ptr; - s->code_ptr++; - - /* label1: */ - *label1_ptr = s->code_ptr - label1_ptr - 1; - /* add x(r1), r0 */ - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write)); + /* label2: */ + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; #else - r0 = addr_reg; + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc); + } #endif +} +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ #ifdef TARGET_WORDS_BIGENDIAN - bswap = 1; + const int bswap = 1; #else - bswap = 0; + const int bswap = 0; #endif - switch(opc) { + /* ??? Ideally we wouldn't need a scratch register. For user-only, + we could perform the bswap twice to restore the original value + instead of moving to the scratch. But as it is, the L constraint + means that the second argument reg is definitely free here. */ + int scratch = tcg_target_call_iarg_regs[1]; + + switch (sizeop) { case 0: - /* movb */ - tcg_out_modrm_offset(s, 0x88, data_reg, r0, GUEST_BASE); + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs); break; case 1: if (bswap) { - tcg_out_mov(s, r1, data_reg); - tcg_out8(s, 0x66); /* rolw $8, %ecx */ - tcg_out_modrm(s, 0xc1, 0, r1); - tcg_out8(s, 8); - data_reg = r1; + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_rolw_8(s, scratch); + datalo = scratch; } - /* movw */ - tcg_out8(s, 0x66); - tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs); break; case 2: if (bswap) { - tcg_out_mov(s, r1, data_reg); - /* bswap data_reg */ - tcg_out_opc(s, (0xc8 + r1) | P_EXT); - data_reg = r1; + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + datalo = scratch; } - /* movl */ - tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE); + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); break; case 3: - if (bswap) { - tcg_out_mov(s, r1, data_reg2); - /* bswap data_reg */ - tcg_out_opc(s, (0xc8 + r1) | P_EXT); - tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE); - tcg_out_mov(s, r1, data_reg); - /* bswap data_reg */ - tcg_out_opc(s, (0xc8 + r1) | P_EXT); - tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4); + if (TCG_TARGET_REG_BITS == 64) { + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); + tcg_out_bswap64(s, scratch); + datalo = scratch; + } + tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs); + } else if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs); + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4); } else { - tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE); - tcg_out_modrm_offset(s, 0x89, data_reg2, r0, GUEST_BASE + 4); + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4); } break; default: tcg_abort(); } +} +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, + int opc) +{ + int data_reg, data_reg2 = 0; + int addrlo_idx; #if defined(CONFIG_SOFTMMU) + int mem_index, s_bits; + int stack_adjust; + uint8_t *label_ptr[3]; +#endif + + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } + +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc; + + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); + + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; + s->code_ptr++; + + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } + + /* XXX: move that code at the end of the TB */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), + TCG_REG_RSI, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index); + stack_adjust = 0; + } else if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg); + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2); + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index); + stack_adjust = 0; + } + } else { + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); + tcg_out_pushi(s, mem_index); + tcg_out_push(s, data_reg2); + tcg_out_push(s, data_reg); + stack_adjust = 12; + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); + switch(opc) { + case 0: + tcg_out_ext8u(s, TCG_REG_ECX, data_reg); + break; + case 1: + tcg_out_ext16u(s, TCG_REG_ECX, data_reg); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg); + break; + } + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } + } + + tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); + + if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { + /* Pop and discard. This is 2 bytes smaller than the add. */ + tcg_out_pop(s, TCG_REG_ECX); + } else if (stack_adjust != 0) { + tcg_out_addi(s, TCG_REG_ESP, stack_adjust); + } + /* label2: */ - *label2_ptr = s->code_ptr - label2_ptr - 1; + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#else + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc); + } #endif } -static inline void tcg_out_op(TCGContext *s, int opc, +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - int c; - + int c, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + rexw = P_REXW; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32) +#endif + switch(opc) { case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]); - tcg_out8(s, 0xe9); /* jmp tb_ret_addr */ - tcg_out32(s, tb_ret_addr - s->code_ptr - 4); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_jmp(s, (tcg_target_long) tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ - tcg_out8(s, 0xe9); /* jmp im */ + tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; tcg_out32(s, 0); } else { /* indirect jump method */ - /* jmp Ev */ - tcg_out_modrm_offset(s, 0xff, 4, -1, + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, (tcg_target_long)(s->tb_next + args[0])); } s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; break; case INDEX_op_call: if (const_args[0]) { - tcg_out8(s, 0xe8); - tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4); + tcg_out_calli(s, args[0]); } else { - tcg_out_modrm(s, 0xff, 2, args[0]); + /* call *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]); } break; case INDEX_op_jmp: if (const_args[0]) { - tcg_out8(s, 0xe9); - tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4); + tcg_out_jmp(s, args[0]); } else { - tcg_out_modrm(s, 0xff, 4, args[0]); + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); } break; case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, args[0]); + tcg_out_jxx(s, JCC_JMP, args[0], 0); break; case INDEX_op_movi_i32: tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); break; - case INDEX_op_ld8u_i32: - /* movzbl */ - tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]); + OP_32_64(ld8u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); break; - case INDEX_op_ld8s_i32: - /* movsbl */ - tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]); + OP_32_64(ld8s): + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); break; - case INDEX_op_ld16u_i32: - /* movzwl */ - tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]); + OP_32_64(ld16u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); break; - case INDEX_op_ld16s_i32: - /* movswl */ - tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]); + OP_32_64(ld16s): + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld32u_i64: +#endif case INDEX_op_ld_i32: - /* movl */ - tcg_out_modrm_offset(s, 0x8b, args[0], args[1], args[2]); + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_st8_i32: - /* movb */ - tcg_out_modrm_offset(s, 0x88, args[0], args[1], args[2]); + + OP_32_64(st8): + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); break; - case INDEX_op_st16_i32: - /* movw */ - tcg_out8(s, 0x66); - tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]); + OP_32_64(st16): + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_st32_i64: +#endif case INDEX_op_st_i32: - /* movl */ - tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]); + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_sub_i32: + + OP_32_64(add): + /* For 3-operand addition, use LEA. */ + if (args[0] != args[1]) { + TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + + if (const_args[2]) { + c3 = a2, a2 = -1; + } else if (a0 == a2) { + /* Watch out for dest = src + dest, since we've removed + the matching constraint on the add. */ + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + break; + } + + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); + break; + } + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): c = ARITH_SUB; goto gen_arith; - case INDEX_op_and_i32: + OP_32_64(and): c = ARITH_AND; goto gen_arith; - case INDEX_op_or_i32: + OP_32_64(or): c = ARITH_OR; goto gen_arith; - case INDEX_op_xor_i32: + OP_32_64(xor): c = ARITH_XOR; goto gen_arith; - case INDEX_op_add_i32: - c = ARITH_ADD; gen_arith: if (const_args[2]) { - tgen_arithi(s, c, args[0], args[2], 0); + tgen_arithi(s, c + rexw, args[0], args[2], 0); } else { - tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]); + tgen_arithr(s, c + rexw, args[0], args[2]); } break; - case INDEX_op_mul_i32: + + OP_32_64(mul): if (const_args[2]) { int32_t val; val = args[2]; if (val == (int8_t)val) { - tcg_out_modrm(s, 0x6b, args[0], args[0]); + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); tcg_out8(s, val); } else { - tcg_out_modrm(s, 0x69, args[0], args[0]); + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); tcg_out32(s, val); } } else { - tcg_out_modrm(s, 0xaf | P_EXT, args[0], args[2]); + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); } break; - case INDEX_op_mulu2_i32: - tcg_out_modrm(s, 0xf7, 4, args[3]); - break; - case INDEX_op_div2_i32: - tcg_out_modrm(s, 0xf7, 7, args[4]); + + OP_32_64(div2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); break; - case INDEX_op_divu2_i32: - tcg_out_modrm(s, 0xf7, 6, args[4]); + OP_32_64(divu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); break; - case INDEX_op_shl_i32: + + OP_32_64(shl): c = SHIFT_SHL; - gen_shift32: - if (const_args[2]) { - if (args[2] == 1) { - tcg_out_modrm(s, 0xd1, c, args[0]); - } else { - tcg_out_modrm(s, 0xc1, c, args[0]); - tcg_out8(s, args[2]); - } - } else { - tcg_out_modrm(s, 0xd3, c, args[0]); - } - break; - case INDEX_op_shr_i32: + goto gen_shift; + OP_32_64(shr): c = SHIFT_SHR; - goto gen_shift32; - case INDEX_op_sar_i32: + goto gen_shift; + OP_32_64(sar): c = SHIFT_SAR; - goto gen_shift32; - case INDEX_op_rotl_i32: + goto gen_shift; + OP_32_64(rotl): c = SHIFT_ROL; - goto gen_shift32; - case INDEX_op_rotr_i32: + goto gen_shift; + OP_32_64(rotr): c = SHIFT_ROR; - goto gen_shift32; - - case INDEX_op_add2_i32: - if (const_args[4]) - tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]); - if (const_args[5]) - tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]); - break; - case INDEX_op_sub2_i32: - if (const_args[4]) - tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]); - if (const_args[5]) - tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); - else - tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]); + goto gen_shift; + gen_shift: + if (const_args[2]) { + tcg_out_shifti(s, c + rexw, args[0], args[2]); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + } break; + case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]); + tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], + args[3], 0); break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); + case INDEX_op_setcond_i32: + tcg_out_setcond32(s, args[3], args[0], args[1], + args[2], const_args[2]); break; - case INDEX_op_bswap16_i32: - tcg_out8(s, 0x66); - tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]); - tcg_out8(s, 8); + OP_32_64(bswap16): + tcg_out_rolw_8(s, args[0]); break; - case INDEX_op_bswap32_i32: - tcg_out_opc(s, (0xc8 + args[0]) | P_EXT); + OP_32_64(bswap32): + tcg_out_bswap32(s, args[0]); break; - case INDEX_op_neg_i32: - tcg_out_modrm(s, 0xf7, 3, args[0]); + OP_32_64(neg): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); break; - - case INDEX_op_not_i32: - tcg_out_modrm(s, 0xf7, 2, args[0]); + OP_32_64(not): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); break; - case INDEX_op_ext8s_i32: - tcg_out_modrm(s, 0xbe | P_EXT, args[0], args[1]); + OP_32_64(ext8s): + tcg_out_ext8s(s, args[0], args[1], rexw); break; - case INDEX_op_ext16s_i32: - tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]); + OP_32_64(ext16s): + tcg_out_ext16s(s, args[0], args[1], rexw); break; - case INDEX_op_ext8u_i32: - tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]); + OP_32_64(ext8u): + tcg_out_ext8u(s, args[0], args[1]); break; - case INDEX_op_ext16u_i32: - tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]); + OP_32_64(ext16u): + tcg_out_ext16u(s, args[0], args[1]); break; case INDEX_op_qemu_ld8u: @@ -1092,13 +1653,16 @@ static inline void tcg_out_op(TCGContext *s, int opc, case INDEX_op_qemu_ld16s: tcg_out_qemu_ld(s, args, 1 | 4); break; +#if TCG_TARGET_REG_BITS == 64 case INDEX_op_qemu_ld32u: +#endif + case INDEX_op_qemu_ld32: tcg_out_qemu_ld(s, args, 2); break; case INDEX_op_qemu_ld64: tcg_out_qemu_ld(s, args, 3); break; - + case INDEX_op_qemu_st8: tcg_out_qemu_st(s, args, 0); break; @@ -1112,9 +1676,82 @@ static inline void tcg_out_op(TCGContext *s, int opc, tcg_out_qemu_st(s, args, 3); break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + case INDEX_op_mulu2_i32: + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]); + break; + case INDEX_op_add2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_ADD, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_ADC, args[1], args[5]); + } + break; + case INDEX_op_sub2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_SUB, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_SBB, args[1], args[5]); + } + break; +#else /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_movi_i64: + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ld32s_i64: + tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, 2 | 4); + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], + args[3], 0); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond64(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0]); + break; + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, args[0], args[1]); + break; +#endif + default: tcg_abort(); } + +#undef OP_32_64 } static const TCGTargetOpDef x86_op_defs[] = { @@ -1134,10 +1771,9 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_st16_i32, { "r", "r" } }, { INDEX_op_st_i32, { "r", "r" } }, - { INDEX_op_add_i32, { "r", "0", "ri" } }, + { INDEX_op_add_i32, { "r", "r", "ri" } }, { INDEX_op_sub_i32, { "r", "0", "ri" } }, { INDEX_op_mul_i32, { "r", "0", "ri" } }, - { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, { INDEX_op_and_i32, { "r", "0", "ri" } }, @@ -1152,10 +1788,6 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_brcond_i32, { "r", "ri" } }, - { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, - { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, - { INDEX_op_bswap16_i32, { "r", "0" } }, { INDEX_op_bswap32_i32, { "r", "0" } }, @@ -1165,15 +1797,84 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext8s_i32, { "r", "q" } }, { INDEX_op_ext16s_i32, { "r", "r" } }, - { INDEX_op_ext8u_i32, { "r", "q"} }, - { INDEX_op_ext16u_i32, { "r", "r"} }, + { INDEX_op_ext8u_i32, { "r", "q" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, -#if TARGET_LONG_BITS == 32 +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else + { INDEX_op_mov_i64, { "r", "r" } }, + { INDEX_op_movi_i64, { "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "0", "re" } }, + { INDEX_op_mul_i64, { "r", "0", "re" } }, + { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "re" } }, + { INDEX_op_and_i64, { "r", "0", "reZ" } }, + { INDEX_op_or_i64, { "r", "0", "re" } }, + { INDEX_op_xor_i64, { "r", "0", "re" } }, + + { INDEX_op_shl_i64, { "r", "0", "ci" } }, + { INDEX_op_shr_i64, { "r", "0", "ci" } }, + { INDEX_op_sar_i64, { "r", "0", "ci" } }, + { INDEX_op_rotl_i64, { "r", "0", "ci" } }, + { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i64, { "r", "re" } }, + { INDEX_op_setcond_i64, { "r", "r", "re" } }, + + { INDEX_op_bswap16_i64, { "r", "0" } }, + { INDEX_op_bswap32_i64, { "r", "0" } }, + { INDEX_op_bswap64_i64, { "r", "0" } }, + { INDEX_op_neg_i64, { "r", "0" } }, + { INDEX_op_not_i64, { "r", "0" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 { INDEX_op_qemu_ld8u, { "r", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld64, { "r", "r", "L" } }, { INDEX_op_qemu_st8, { "cb", "L" } }, @@ -1185,7 +1886,7 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, { INDEX_op_qemu_st8, { "cb", "L", "L" } }, @@ -1197,64 +1898,85 @@ static const TCGTargetOpDef x86_op_defs[] = { }; static int tcg_target_callee_save_regs[] = { - /* TCG_REG_EBP, */ /* currently used for the global env, so no - need to save */ +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + /* TCG_REG_R14, */ /* Currently used for the global env. */ + TCG_REG_R15, +#else + /* TCG_REG_EBP, */ /* Currently used for the global env. */ TCG_REG_EBX, TCG_REG_ESI, TCG_REG_EDI, +#endif }; -static inline void tcg_out_push(TCGContext *s, int reg) -{ - tcg_out_opc(s, 0x50 + reg); -} - -static inline void tcg_out_pop(TCGContext *s, int reg) -{ - tcg_out_opc(s, 0x58 + reg); -} - /* Generate global QEMU prologue and epilogue code */ -void tcg_target_qemu_prologue(TCGContext *s) +static void tcg_target_qemu_prologue(TCGContext *s) { int i, frame_size, push_size, stack_addend; - + /* TB prologue */ - /* save all callee saved registers */ - for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { tcg_out_push(s, tcg_target_callee_save_regs[i]); } - /* reserve some stack space */ - push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4; + + /* Reserve some stack space. */ + push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs); + push_size *= TCG_TARGET_REG_BITS / 8; + frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE; - frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1); stack_addend = frame_size - push_size; tcg_out_addi(s, TCG_REG_ESP, -stack_addend); - tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */ - + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]); + /* TB epilogue */ tb_ret_addr = s->code_ptr; + tcg_out_addi(s, TCG_REG_ESP, stack_addend); - for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { + + for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { tcg_out_pop(s, tcg_target_callee_save_regs[i]); } - tcg_out8(s, 0xc3); /* ret */ + tcg_out_opc(s, OPC_RET, 0, 0, 0); } -void tcg_target_init(TCGContext *s) +static void tcg_target_init(TCGContext *s) { +#if !defined(CONFIG_USER_ONLY) /* fail safe */ if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) tcg_abort(); +#endif + + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + } else { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); + } + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + } - tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); - tcg_regset_set32(tcg_target_call_clobber_regs, 0, - (1 << TCG_REG_EAX) | - (1 << TCG_REG_EDX) | - (1 << TCG_REG_ECX)); - tcg_regset_clear(s->reserved_regs); tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP); diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index f97034c..a869cf5 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -23,10 +23,18 @@ */ #define TCG_TARGET_I386 1 +#if defined(__x86_64__) +# define TCG_TARGET_REG_BITS 64 +#else #define TCG_TARGET_REG_BITS 32 +#endif //#define TCG_TARGET_WORDS_BIGENDIAN +#if TCG_TARGET_REG_BITS == 64 +# define TCG_TARGET_NB_REGS 16 +#else #define TCG_TARGET_NB_REGS 8 +#endif enum { TCG_REG_EAX = 0, @@ -37,14 +45,37 @@ enum { TCG_REG_EBP, TCG_REG_ESI, TCG_REG_EDI, + + /* 64-bit registers; always define the symbols to avoid + too much if-deffing. */ + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_RAX = TCG_REG_EAX, + TCG_REG_RCX = TCG_REG_ECX, + TCG_REG_RDX = TCG_REG_EDX, + TCG_REG_RBX = TCG_REG_EBX, + TCG_REG_RSP = TCG_REG_ESP, + TCG_REG_RBP = TCG_REG_EBP, + TCG_REG_RSI = TCG_REG_ESI, + TCG_REG_RDI = TCG_REG_EDI, }; +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 + /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET 0 /* optional instructions */ +#define TCG_TARGET_HAS_div2_i32 #define TCG_TARGET_HAS_rot_i32 #define TCG_TARGET_HAS_ext8s_i32 #define TCG_TARGET_HAS_ext16s_i32 @@ -56,13 +87,43 @@ enum { #define TCG_TARGET_HAS_not_i32 // #define TCG_TARGET_HAS_andc_i32 // #define TCG_TARGET_HAS_orc_i32 +// #define TCG_TARGET_HAS_eqv_i32 +// #define TCG_TARGET_HAS_nand_i32 +// #define TCG_TARGET_HAS_nor_i32 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_div2_i64 +#define TCG_TARGET_HAS_rot_i64 +#define TCG_TARGET_HAS_ext8s_i64 +#define TCG_TARGET_HAS_ext16s_i64 +#define TCG_TARGET_HAS_ext32s_i64 +#define TCG_TARGET_HAS_ext8u_i64 +#define TCG_TARGET_HAS_ext16u_i64 +#define TCG_TARGET_HAS_ext32u_i64 +#define TCG_TARGET_HAS_bswap16_i64 +#define TCG_TARGET_HAS_bswap32_i64 +#define TCG_TARGET_HAS_bswap64_i64 +#define TCG_TARGET_HAS_neg_i64 +#define TCG_TARGET_HAS_not_i64 +// #define TCG_TARGET_HAS_andc_i64 +// #define TCG_TARGET_HAS_orc_i64 +// #define TCG_TARGET_HAS_eqv_i64 +// #define TCG_TARGET_HAS_nand_i64 +// #define TCG_TARGET_HAS_nor_i64 +#endif #define TCG_TARGET_HAS_GUEST_BASE /* Note: must be synced with dyngen-exec.h */ +#if TCG_TARGET_REG_BITS == 64 +#define TCG_AREG0 TCG_REG_R14 +#define TCG_AREG1 TCG_REG_R15 +#define TCG_AREG2 TCG_REG_R12 +#else #define TCG_AREG0 TCG_REG_EBP #define TCG_AREG1 TCG_REG_EBX #define TCG_AREG2 TCG_REG_ESI +#endif static inline void flush_icache_range(unsigned long start, unsigned long stop) { diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 96cc461..7970268 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -36,11 +36,6 @@ static uint8_t *tb_ret_addr; #endif #define FAST_PATH -#if TARGET_PHYS_ADDR_BITS <= 32 -#define ADDEND_OFFSET 0 -#else -#define ADDEND_OFFSET 4 -#endif #ifndef GUEST_BASE #define GUEST_BASE 0 @@ -56,7 +51,7 @@ static uint8_t *tb_ret_addr; static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "r0", "r1", - "rp", + "r2", "r3", "r4", "r5", @@ -328,10 +323,12 @@ static int tcg_target_const_match(tcg_target_long val, #define MULLI OPCD( 7) #define CMPLI OPCD(10) #define CMPI OPCD(11) +#define SUBFIC OPCD( 8) #define LWZU OPCD(33) #define STWU OPCD(37) +#define RLWIMI OPCD(20) #define RLWINM OPCD(21) #define RLWNM OPCD(23) @@ -373,6 +370,8 @@ static int tcg_target_const_match(tcg_target_long val, #define NOR XO31(124) #define ANDC XO31( 60) #define ORC XO31(412) +#define EQV XO31(284) +#define NAND XO31(476) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -438,7 +437,7 @@ static const uint32_t tcg_to_bc[10] = { [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, }; -static void tcg_out_mov(TCGContext *s, int ret, int arg) +static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) { tcg_out32 (s, OR | SAB (arg, ret, arg)); } @@ -592,11 +591,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) /* slow path */ #if TARGET_LONG_BITS == 32 - tcg_out_mov (s, 3, addr_reg); + tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg); tcg_out_movi (s, TCG_TYPE_I32, 4, mem_index); #else - tcg_out_mov (s, 3, addr_reg2); - tcg_out_mov (s, 4, addr_reg); + tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg2); + tcg_out_mov (s, TCG_TYPE_I32, 4, addr_reg); tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index); #endif @@ -612,23 +611,23 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) case 1: case 2: if (data_reg != 3) - tcg_out_mov (s, data_reg, 3); + tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3); break; case 3: if (data_reg == 3) { if (data_reg2 == 4) { - tcg_out_mov (s, 0, 4); - tcg_out_mov (s, 4, 3); - tcg_out_mov (s, 3, 0); + tcg_out_mov (s, TCG_TYPE_I32, 0, 4); + tcg_out_mov (s, TCG_TYPE_I32, 4, 3); + tcg_out_mov (s, TCG_TYPE_I32, 3, 0); } else { - tcg_out_mov (s, data_reg2, 3); - tcg_out_mov (s, 3, 4); + tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); + tcg_out_mov (s, TCG_TYPE_I32, 3, 4); } } else { - if (data_reg != 4) tcg_out_mov (s, data_reg, 4); - if (data_reg2 != 3) tcg_out_mov (s, data_reg2, 3); + if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4); + if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3); } break; } @@ -644,7 +643,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) tcg_out32 (s, (LWZ | RT (r0) | RA (r0) - | (ADDEND_OFFSET + offsetof (CPUTLBEntry, addend) + | (offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read)) )); /* r0 = env->tlb_table[mem_index][index].addend */ @@ -706,7 +705,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) if (r0 == data_reg2) { tcg_out32 (s, LWZ | RT (0) | RA (r0)); tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4); - tcg_out_mov (s, data_reg2, 0); + tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0); } else { tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0)); @@ -788,11 +787,11 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) /* slow path */ #if TARGET_LONG_BITS == 32 - tcg_out_mov (s, 3, addr_reg); + tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg); ir = 4; #else - tcg_out_mov (s, 3, addr_reg2); - tcg_out_mov (s, 4, addr_reg); + tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg2); + tcg_out_mov (s, TCG_TYPE_I32, 4, addr_reg); #ifdef TCG_TARGET_CALL_ALIGN_ARGS ir = 5; #else @@ -818,14 +817,14 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) | ME (31))); break; case 2: - tcg_out_mov (s, ir, data_reg); + tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); break; case 3: #ifdef TCG_TARGET_CALL_ALIGN_ARGS ir = 5; #endif - tcg_out_mov (s, ir++, data_reg2); - tcg_out_mov (s, ir, data_reg); + tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2); + tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg); break; } ir++; @@ -843,7 +842,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) tcg_out32 (s, (LWZ | RT (r0) | RA (r0) - | (ADDEND_OFFSET + offsetof (CPUTLBEntry, addend) + | (offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write)) )); /* r0 = env->tlb_table[mem_index][index].addend */ @@ -901,7 +900,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) #endif } -void tcg_target_qemu_prologue (TCGContext *s) +static void tcg_target_qemu_prologue (TCGContext *s) { int i, frame_size; @@ -934,7 +933,10 @@ void tcg_target_qemu_prologue (TCGContext *s) tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size + LR_OFFSET)); #ifdef CONFIG_USE_GUEST_BASE - tcg_out_movi (s, TCG_TYPE_I32, TCG_GUEST_BASE_REG, GUEST_BASE); + if (GUEST_BASE) { + tcg_out_movi (s, TCG_TYPE_I32, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } #endif tcg_out32 (s, MTSPR | RS (3) | CTR); @@ -1076,7 +1078,8 @@ static void tcg_out_bc (TCGContext *s, int bc, int label_index) static void tcg_out_cr7eq_from_cond (TCGContext *s, const TCGArg *args, const int *const_args) { - int cond = args[4], op; + TCGCond cond = args[4]; + int op; struct { int bit1; int bit2; int cond2; } bits[] = { [TCG_COND_LT ] = { CR_LT, CR_LT, TCG_COND_LT }, [TCG_COND_LE ] = { CR_LT, CR_GT, TCG_COND_LT }, @@ -1106,9 +1109,9 @@ static void tcg_out_cr7eq_from_cond (TCGContext *s, const TCGArg *args, case TCG_COND_GEU: op = (b->bit1 != b->bit2) ? CRANDC : CRAND; tcg_out_cmp (s, b->cond2, args[1], args[3], const_args[3], 5); - tcg_out_cmp (s, TCG_COND_EQ, args[1], args[3], const_args[3], 6); - tcg_out_cmp (s, cond, args[0], args[2], const_args[2], 7); - tcg_out32 (s, op | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, b->bit2)); + tcg_out_cmp (s, tcg_unsigned_cond (cond), args[0], args[2], + const_args[2], 7); + tcg_out32 (s, op | BT (7, CR_EQ) | BA (5, CR_EQ) | BB (7, b->bit2)); tcg_out32 (s, CROR | BT (7, CR_EQ) | BA (5, b->bit1) | BB (7, CR_EQ)); break; default: @@ -1116,7 +1119,7 @@ static void tcg_out_cr7eq_from_cond (TCGContext *s, const TCGArg *args, } } -static void tcg_out_setcond (TCGContext *s, int cond, TCGArg arg0, +static void tcg_out_setcond (TCGContext *s, TCGCond cond, TCGArg arg0, TCGArg arg1, TCGArg arg2, int const_arg2) { int crop, sh, arg; @@ -1240,7 +1243,7 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args, ); } -static void tcg_out_brcond (TCGContext *s, int cond, +static void tcg_out_brcond (TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) { @@ -1288,7 +1291,7 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) flush_icache_range(jmp_addr, jmp_addr + patch_size); } -static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { switch (opc) { @@ -1478,6 +1481,15 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_orc_i32: tcg_out32 (s, ORC | SAB (args[1], args[0], args[2])); break; + case INDEX_op_eqv_i32: + tcg_out32 (s, EQV | SAB (args[1], args[0], args[2])); + break; + case INDEX_op_nand_i32: + tcg_out32 (s, NAND | SAB (args[1], args[0], args[2])); + break; + case INDEX_op_nor_i32: + tcg_out32 (s, NOR | SAB (args[1], args[0], args[2])); + break; case INDEX_op_mul_i32: if (const_args[2]) { @@ -1517,7 +1529,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, if (args[0] == args[2] || args[0] == args[3]) { tcg_out32 (s, MULLW | TAB (0, args[2], args[3])); tcg_out32 (s, MULHWU | TAB (args[1], args[2], args[3])); - tcg_out_mov (s, args[0], 0); + tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); } else { tcg_out32 (s, MULLW | TAB (args[0], args[2], args[3])); @@ -1575,7 +1587,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_rotr_i32: if (const_args[2]) { if (!args[2]) { - tcg_out_mov (s, args[0], args[1]); + tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]); } else { tcg_out32 (s, RLWINM @@ -1588,7 +1600,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, } } else { - tcg_out32 (s, ADDI | RT (0) | RA (args[2]) | 0xffe0); + tcg_out32 (s, SUBFIC | RT (0) | RA (args[2]) | 32); tcg_out32 (s, RLWNM | RA (args[0]) | RS (args[1]) @@ -1603,7 +1615,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, if (args[0] == args[3] || args[0] == args[5]) { tcg_out32 (s, ADDC | TAB (0, args[2], args[4])); tcg_out32 (s, ADDE | TAB (args[1], args[3], args[5])); - tcg_out_mov (s, args[0], 0); + tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); } else { tcg_out32 (s, ADDC | TAB (args[0], args[2], args[4])); @@ -1614,7 +1626,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, if (args[0] == args[3] || args[0] == args[5]) { tcg_out32 (s, SUBFC | TAB (0, args[4], args[2])); tcg_out32 (s, SUBFE | TAB (args[1], args[5], args[3])); - tcg_out_mov (s, args[0], 0); + tcg_out_mov (s, TCG_TYPE_I32, args[0], 0); } else { tcg_out32 (s, SUBFC | TAB (args[0], args[4], args[2])); @@ -1641,7 +1653,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, break; case INDEX_op_not_i32: - tcg_out32 (s, NOR | SAB (args[1], args[0], args[0])); + tcg_out32 (s, NOR | SAB (args[1], args[0], args[1])); break; case INDEX_op_qemu_ld8u: @@ -1656,7 +1668,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_qemu_ld16s: tcg_out_qemu_ld(s, args, 1 | 4); break; - case INDEX_op_qemu_ld32u: + case INDEX_op_qemu_ld32: tcg_out_qemu_ld(s, args, 2); break; case INDEX_op_qemu_ld64: @@ -1707,6 +1719,77 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, tcg_out_setcond2 (s, args, const_args); break; + case INDEX_op_bswap16_i32: + /* Stolen from gcc's builtin_bswap16 */ + + /* a1 = abcd */ + + /* r0 = (a1 << 8) & 0xff00 # 00d0 */ + tcg_out32 (s, RLWINM + | RA (0) + | RS (args[1]) + | SH (8) + | MB (16) + | ME (23) + ); + + /* a0 = rotate_left (a1, 24) & 0xff # 000c */ + tcg_out32 (s, RLWINM + | RA (args[0]) + | RS (args[1]) + | SH (24) + | MB (24) + | ME (31) + ); + + /* a0 = a0 | r0 # 00dc */ + tcg_out32 (s, OR | SAB (0, args[0], args[0])); + break; + + case INDEX_op_bswap32_i32: + /* Stolen from gcc's builtin_bswap32 */ + { + int a0 = args[0]; + + /* a1 = args[1] # abcd */ + + if (a0 == args[1]) { + a0 = 0; + } + + /* a0 = rotate_left (a1, 8) # bcda */ + tcg_out32 (s, RLWINM + | RA (a0) + | RS (args[1]) + | SH (8) + | MB (0) + | ME (31) + ); + + /* a0 = (a0 & ~0xff000000) | ((a1 << 24) & 0xff000000) # dcda */ + tcg_out32 (s, RLWIMI + | RA (a0) + | RS (args[1]) + | SH (24) + | MB (0) + | ME (7) + ); + + /* a0 = (a0 & ~0x0000ff00) | ((a1 << 24) & 0x0000ff00) # dcba */ + tcg_out32 (s, RLWIMI + | RA (a0) + | RS (args[1]) + | SH (24) + | MB (16) + | ME (23) + ); + + if (!a0) { + tcg_out_mov (s, TCG_TYPE_I32, args[0], a0); + } + } + break; + default: tcg_dump_ops (s, stderr); tcg_abort (); @@ -1761,16 +1844,22 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_andc_i32, { "r", "r", "r" } }, { INDEX_op_orc_i32, { "r", "r", "r" } }, + { INDEX_op_eqv_i32, { "r", "r", "r" } }, + { INDEX_op_nand_i32, { "r", "r", "r" } }, + { INDEX_op_nor_i32, { "r", "r", "r" } }, { INDEX_op_setcond_i32, { "r", "r", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "r", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld64, { "r", "r", "L" } }, { INDEX_op_qemu_st8, { "K", "K" } }, @@ -1782,7 +1871,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } }, { INDEX_op_qemu_st8, { "K", "K", "K" } }, @@ -1799,7 +1888,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { -1 }, }; -void tcg_target_init(TCGContext *s) +static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, @@ -1828,9 +1917,6 @@ void tcg_target_init(TCGContext *s) #ifdef _CALL_SYSV tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); #endif -#ifdef CONFIG_USE_GUEST_BASE - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); -#endif tcg_add_target_add_op_defs(ppc_op_defs); } diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 0c71a11..a1f8599 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -65,7 +65,7 @@ enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_R1 #define TCG_TARGET_STACK_ALIGN 16 -#if defined _CALL_DARWIN +#if defined _CALL_DARWIN || defined __APPLE__ #define TCG_TARGET_CALL_STACK_OFFSET 24 #elif defined _CALL_AIX #define TCG_TARGET_CALL_STACK_OFFSET 52 @@ -83,15 +83,16 @@ enum { #define TCG_TARGET_HAS_ext16s_i32 #define TCG_TARGET_HAS_ext8u_i32 #define TCG_TARGET_HAS_ext16u_i32 -/* #define TCG_TARGET_HAS_bswap16_i32 */ -/* #define TCG_TARGET_HAS_bswap32_i32 */ +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 #define TCG_TARGET_HAS_not_i32 #define TCG_TARGET_HAS_neg_i32 #define TCG_TARGET_HAS_andc_i32 #define TCG_TARGET_HAS_orc_i32 +#define TCG_TARGET_HAS_eqv_i32 +#define TCG_TARGET_HAS_nand_i32 +#define TCG_TARGET_HAS_nor_i32 #define TCG_AREG0 TCG_REG_R27 -#define TCG_AREG1 TCG_REG_R24 -#define TCG_AREG2 TCG_REG_R25 #define TCG_TARGET_HAS_GUEST_BASE diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index fed179c..ebbee34 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -28,12 +28,6 @@ static uint8_t *tb_ret_addr; #define FAST_PATH -#if TARGET_PHYS_ADDR_BITS == 32 -#define LD_ADDEND LWZ -#else -#define LD_ADDEND LD -#endif - #if TARGET_LONG_BITS == 32 #define LD_ADDR LWZU #define CMP_L 0 @@ -56,7 +50,7 @@ static uint8_t *tb_ret_addr; static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "r0", "r1", - "rp", + "r2", "r3", "r4", "r5", @@ -441,7 +435,7 @@ static const uint32_t tcg_to_bc[10] = { [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, }; -static void tcg_out_mov (TCGContext *s, int ret, int arg) +static void tcg_out_mov (TCGContext *s, TCGType type, int ret, int arg) { tcg_out32 (s, OR | SAB (arg, ret, arg)); } @@ -650,7 +644,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) #endif /* slow path */ - tcg_out_mov (s, 3, addr_reg); + tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg); tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index); tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1); @@ -670,7 +664,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) case 2: case 3: if (data_reg != 3) - tcg_out_mov (s, data_reg, 3); + tcg_out_mov (s, TCG_TYPE_I64, data_reg, 3); break; } label2_ptr = s->code_ptr; @@ -682,7 +676,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) #endif /* r0 now contains &env->tlb_table[mem_index][index].addr_read */ - tcg_out32 (s, (LD_ADDEND + tcg_out32 (s, (LD | RT (r0) | RA (r0) | (offsetof (CPUTLBEntry, addend) @@ -796,7 +790,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) #endif /* slow path */ - tcg_out_mov (s, 3, addr_reg); + tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg); tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc))); tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index); @@ -810,7 +804,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr); #endif - tcg_out32 (s, (LD_ADDEND + tcg_out32 (s, (LD | RT (r0) | RA (r0) | (offsetof (CPUTLBEntry, addend) @@ -866,7 +860,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) #endif } -void tcg_target_qemu_prologue (TCGContext *s) +static void tcg_target_qemu_prologue (TCGContext *s) { int i, frame_size; #ifndef __APPLE__ @@ -905,7 +899,10 @@ void tcg_target_qemu_prologue (TCGContext *s) tcg_out32 (s, STD | RS (0) | RA (1) | (frame_size + 16)); #ifdef CONFIG_USE_GUEST_BASE - tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); + if (GUEST_BASE) { + tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } #endif tcg_out32 (s, MTSPR | RS (3) | CTR); @@ -1049,8 +1046,9 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, } -static void tcg_out_setcond (TCGContext *s, TCGType type, int cond, TCGArg arg0, - TCGArg arg1, TCGArg arg2, int const_arg2) +static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) { int crop, sh, arg; @@ -1180,7 +1178,7 @@ static void tcg_out_bc (TCGContext *s, int bc, int label_index) } } -static void tcg_out_brcond (TCGContext *s, int cond, +static void tcg_out_brcond (TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index, int arch64) { @@ -1199,7 +1197,7 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr) flush_icache_range (jmp_addr, jmp_addr + patch_size); } -static void tcg_out_op (TCGContext *s, int opc, const TCGArg *args, +static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { int c; @@ -1517,6 +1515,7 @@ static void tcg_out_op (TCGContext *s, int opc, const TCGArg *args, case INDEX_op_qemu_ld16s: tcg_out_qemu_ld (s, args, 1 | 4); break; + case INDEX_op_qemu_ld32: case INDEX_op_qemu_ld32u: tcg_out_qemu_ld (s, args, 2); break; @@ -1645,6 +1644,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld32u, { "r", "L" } }, { INDEX_op_qemu_ld32s, { "r", "L" } }, { INDEX_op_qemu_ld64, { "r", "L" } }, @@ -1666,7 +1666,7 @@ static const TCGTargetOpDef ppc_op_defs[] = { { -1 }, }; -void tcg_target_init (TCGContext *s) +static void tcg_target_init (TCGContext *s) { tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); @@ -1695,9 +1695,5 @@ void tcg_target_init (TCGContext *s) #endif tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13); -#ifdef CONFIG_USE_GUEST_BASE - tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); -#endif - tcg_add_target_add_op_defs (ppc_op_defs); } diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index f5de642..8a6db11 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -80,6 +80,9 @@ enum { #define TCG_TARGET_HAS_neg_i32 /* #define TCG_TARGET_HAS_andc_i32 */ /* #define TCG_TARGET_HAS_orc_i32 */ +/* #define TCG_TARGET_HAS_eqv_i32 */ +/* #define TCG_TARGET_HAS_nand_i32 */ +/* #define TCG_TARGET_HAS_nor_i32 */ #define TCG_TARGET_HAS_div_i64 /* #define TCG_TARGET_HAS_rot_i64 */ @@ -96,9 +99,11 @@ enum { #define TCG_TARGET_HAS_neg_i64 /* #define TCG_TARGET_HAS_andc_i64 */ /* #define TCG_TARGET_HAS_orc_i64 */ +/* #define TCG_TARGET_HAS_eqv_i64 */ +/* #define TCG_TARGET_HAS_nand_i64 */ +/* #define TCG_TARGET_HAS_nor_i64 */ #define TCG_AREG0 TCG_REG_R27 -#define TCG_AREG1 TCG_REG_R24 -#define TCG_AREG2 TCG_REG_R25 #define TCG_TARGET_HAS_GUEST_BASE +#define TCG_TARGET_EXTEND_ARGS 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index d4ddaa7..5f1353a 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -304,7 +304,7 @@ static void tcg_out_arithc(TCGContext *s, int rd, int rs1, | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); } -static inline void tcg_out_mov(TCGContext *s, int ret, int arg) +static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) { tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); } @@ -520,7 +520,7 @@ static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const) tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); } -static void tcg_out_brcond_i32(TCGContext *s, int cond, +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) { @@ -530,7 +530,7 @@ static void tcg_out_brcond_i32(TCGContext *s, int cond, } #if TCG_TARGET_REG_BITS == 64 -static void tcg_out_brcond_i64(TCGContext *s, int cond, +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int label_index) { @@ -539,7 +539,7 @@ static void tcg_out_brcond_i64(TCGContext *s, int cond, tcg_out_nop(s); } #else -static void tcg_out_brcond2_i32(TCGContext *s, int cond, +static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond, TCGArg al, TCGArg ah, TCGArg bl, int blconst, TCGArg bh, int bhconst, int label_dest) @@ -587,7 +587,7 @@ static void tcg_out_brcond2_i32(TCGContext *s, int cond, } #endif -static void tcg_out_setcond_i32(TCGContext *s, int cond, TCGArg ret, +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) { TCGArg t; @@ -643,7 +643,7 @@ static void tcg_out_setcond_i32(TCGContext *s, int cond, TCGArg ret, } #if TCG_TARGET_REG_BITS == 64 -static void tcg_out_setcond_i64(TCGContext *s, int cond, TCGArg ret, +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1, TCGArg c2, int c2const) { tcg_out_cmp(s, c1, c2, c2const); @@ -653,7 +653,7 @@ static void tcg_out_setcond_i64(TCGContext *s, int cond, TCGArg ret, | MOVCC_XCC | INSN_IMM11(1)); } #else -static void tcg_out_setcond2_i32(TCGContext *s, int cond, TCGArg ret, +static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg al, TCGArg ah, TCGArg bl, int blconst, TCGArg bh, int bhconst) @@ -691,7 +691,7 @@ static void tcg_out_setcond2_i32(TCGContext *s, int cond, TCGArg ret, #endif /* Generate global QEMU prologue and epilogue code */ -void tcg_target_qemu_prologue(TCGContext *s) +static void tcg_target_qemu_prologue(TCGContext *s) { tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | INSN_IMM13(-TCG_TARGET_STACK_MINFRAME)); @@ -725,11 +725,13 @@ static const void * const qemu_st_helpers[4] = { #define TARGET_LD_OP LDX #endif -#if TARGET_PHYS_ADDR_BITS == 32 +#if defined(CONFIG_SOFTMMU) +#if HOST_LONG_BITS == 32 #define TARGET_ADDEND_LD_OP LDUW #else #define TARGET_ADDEND_LD_OP LDX #endif +#endif #ifdef __arch64__ #define HOST_LD_OP LDX @@ -793,7 +795,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out32(s, 0); /* mov (delay slot) */ - tcg_out_mov(s, arg0, addr_reg); + tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg); /* mov */ tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index); @@ -843,7 +845,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, case 3: default: /* mov */ - tcg_out_mov(s, data_reg, arg0); + tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0); break; } @@ -1005,10 +1007,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out32(s, 0); /* mov (delay slot) */ - tcg_out_mov(s, arg0, addr_reg); + tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg); /* mov */ - tcg_out_mov(s, arg1, data_reg); + tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg); /* mov */ tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); @@ -1111,7 +1113,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #endif } -static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { int c; @@ -1316,7 +1318,10 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, case INDEX_op_qemu_ld16s: tcg_out_qemu_ld(s, args, 1 | 4); break; + case INDEX_op_qemu_ld32: +#if TCG_TARGET_REG_BITS == 64 case INDEX_op_qemu_ld32u: +#endif tcg_out_qemu_ld(s, args, 2); break; #if TCG_TARGET_REG_BITS == 64 @@ -1472,8 +1477,9 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, #if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld32u, { "r", "L" } }, { INDEX_op_qemu_ld32s, { "r", "L" } }, #endif @@ -1527,7 +1533,7 @@ static const TCGTargetOpDef sparc_op_defs[] = { { -1 }, }; -void tcg_target_init(TCGContext *s) +static void tcg_target_init(TCGContext *s) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index dbc574d..df0785e 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -87,6 +87,10 @@ enum { #define TCG_TARGET_STACK_ALIGN 8 #endif +#ifdef __arch64__ +#define TCG_TARGET_EXTEND_ARGS 1 +#endif + /* optional instructions */ #define TCG_TARGET_HAS_div_i32 // #define TCG_TARGET_HAS_rot_i32 @@ -100,6 +104,9 @@ enum { #define TCG_TARGET_HAS_not_i32 #define TCG_TARGET_HAS_andc_i32 #define TCG_TARGET_HAS_orc_i32 +// #define TCG_TARGET_HAS_eqv_i32 +// #define TCG_TARGET_HAS_nand_i32 +// #define TCG_TARGET_HAS_nor_i32 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 @@ -117,21 +124,18 @@ enum { #define TCG_TARGET_HAS_not_i64 #define TCG_TARGET_HAS_andc_i64 #define TCG_TARGET_HAS_orc_i64 +// #define TCG_TARGET_HAS_eqv_i64 +// #define TCG_TARGET_HAS_nand_i64 +// #define TCG_TARGET_HAS_nor_i64 #endif -/* Note: must be synced with dyngen-exec.h and Makefile.target */ +/* Note: must be synced with dyngen-exec.h */ #ifdef CONFIG_SOLARIS #define TCG_AREG0 TCG_REG_G2 -#define TCG_AREG1 TCG_REG_G3 -#define TCG_AREG2 TCG_REG_G4 #elif defined(__sparc_v9__) #define TCG_AREG0 TCG_REG_G5 -#define TCG_AREG1 TCG_REG_G6 -#define TCG_AREG2 TCG_REG_G7 #else #define TCG_AREG0 TCG_REG_G6 -#define TCG_AREG1 TCG_REG_G1 -#define TCG_AREG2 TCG_REG_G2 #endif static inline void flush_icache_range(unsigned long start, unsigned long stop) diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 6ae1760..207a89f 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -25,60 +25,60 @@ int gen_new_label(void); -static inline void tcg_gen_op1_i32(int opc, TCGv_i32 arg1) +static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(arg1); } -static inline void tcg_gen_op1_i64(int opc, TCGv_i64 arg1) +static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(arg1); } -static inline void tcg_gen_op1i(int opc, TCGArg arg1) +static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = arg1; } -static inline void tcg_gen_op2_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2) +static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(arg1); *gen_opparam_ptr++ = GET_TCGV_I32(arg2); } -static inline void tcg_gen_op2_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2) +static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(arg1); *gen_opparam_ptr++ = GET_TCGV_I64(arg2); } -static inline void tcg_gen_op2i_i32(int opc, TCGv_i32 arg1, TCGArg arg2) +static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(arg1); *gen_opparam_ptr++ = arg2; } -static inline void tcg_gen_op2i_i64(int opc, TCGv_i64 arg1, TCGArg arg2) +static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(arg1); *gen_opparam_ptr++ = arg2; } -static inline void tcg_gen_op2ii(int opc, TCGArg arg1, TCGArg arg2) +static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = arg1; *gen_opparam_ptr++ = arg2; } -static inline void tcg_gen_op3_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3) { *gen_opc_ptr++ = opc; @@ -87,7 +87,7 @@ static inline void tcg_gen_op3_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = GET_TCGV_I32(arg3); } -static inline void tcg_gen_op3_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3) { *gen_opc_ptr++ = opc; @@ -96,8 +96,8 @@ static inline void tcg_gen_op3_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = GET_TCGV_I64(arg3); } -static inline void tcg_gen_op3i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, - TCGArg arg3) +static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGArg arg3) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(arg1); @@ -105,8 +105,8 @@ static inline void tcg_gen_op3i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg3; } -static inline void tcg_gen_op3i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, - TCGArg arg3) +static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGArg arg3) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(arg1); @@ -114,8 +114,8 @@ static inline void tcg_gen_op3i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = arg3; } -static inline void tcg_gen_ldst_op_i32(int opc, TCGv_i32 val, TCGv_ptr base, - TCGArg offset) +static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(val); @@ -123,8 +123,8 @@ static inline void tcg_gen_ldst_op_i32(int opc, TCGv_i32 val, TCGv_ptr base, *gen_opparam_ptr++ = offset; } -static inline void tcg_gen_ldst_op_i64(int opc, TCGv_i64 val, TCGv_ptr base, - TCGArg offset) +static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(val); @@ -132,8 +132,8 @@ static inline void tcg_gen_ldst_op_i64(int opc, TCGv_i64 val, TCGv_ptr base, *gen_opparam_ptr++ = offset; } -static inline void tcg_gen_qemu_ldst_op_i64_i32(int opc, TCGv_i64 val, TCGv_i32 addr, - TCGArg mem_index) +static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val, + TCGv_i32 addr, TCGArg mem_index) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(val); @@ -141,8 +141,8 @@ static inline void tcg_gen_qemu_ldst_op_i64_i32(int opc, TCGv_i64 val, TCGv_i32 *gen_opparam_ptr++ = mem_index; } -static inline void tcg_gen_qemu_ldst_op_i64_i64(int opc, TCGv_i64 val, TCGv_i64 addr, - TCGArg mem_index) +static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_i64 addr, TCGArg mem_index) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(val); @@ -150,7 +150,7 @@ static inline void tcg_gen_qemu_ldst_op_i64_i64(int opc, TCGv_i64 val, TCGv_i64 *gen_opparam_ptr++ = mem_index; } -static inline void tcg_gen_op4_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4) { *gen_opc_ptr++ = opc; @@ -160,7 +160,7 @@ static inline void tcg_gen_op4_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = GET_TCGV_I32(arg4); } -static inline void tcg_gen_op4_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGv_i64 arg4) { *gen_opc_ptr++ = opc; @@ -170,7 +170,7 @@ static inline void tcg_gen_op4_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = GET_TCGV_I64(arg4); } -static inline void tcg_gen_op4i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGArg arg4) { *gen_opc_ptr++ = opc; @@ -180,7 +180,7 @@ static inline void tcg_gen_op4i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg4; } -static inline void tcg_gen_op4i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGArg arg4) { *gen_opc_ptr++ = opc; @@ -190,7 +190,7 @@ static inline void tcg_gen_op4i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = arg4; } -static inline void tcg_gen_op4ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGArg arg3, TCGArg arg4) { *gen_opc_ptr++ = opc; @@ -200,7 +200,7 @@ static inline void tcg_gen_op4ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg4; } -static inline void tcg_gen_op4ii_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGArg arg3, TCGArg arg4) { *gen_opc_ptr++ = opc; @@ -210,7 +210,7 @@ static inline void tcg_gen_op4ii_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = arg4; } -static inline void tcg_gen_op5_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5) { *gen_opc_ptr++ = opc; @@ -221,7 +221,7 @@ static inline void tcg_gen_op5_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = GET_TCGV_I32(arg5); } -static inline void tcg_gen_op5_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5) { *gen_opc_ptr++ = opc; @@ -232,7 +232,7 @@ static inline void tcg_gen_op5_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = GET_TCGV_I64(arg5); } -static inline void tcg_gen_op5i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5) { *gen_opc_ptr++ = opc; @@ -243,7 +243,7 @@ static inline void tcg_gen_op5i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg5; } -static inline void tcg_gen_op5i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5) { *gen_opc_ptr++ = opc; @@ -254,7 +254,31 @@ static inline void tcg_gen_op5i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = arg5; } -static inline void tcg_gen_op6_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGv_i32 arg3, + TCGArg arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = arg4; + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGv_i64 arg3, + TCGArg arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = arg4; + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5, TCGv_i32 arg6) { @@ -267,7 +291,7 @@ static inline void tcg_gen_op6_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = GET_TCGV_I32(arg6); } -static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5, TCGv_i64 arg6) { @@ -280,7 +304,7 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = GET_TCGV_I64(arg6); } -static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, +static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5, TCGArg arg6) { @@ -293,7 +317,7 @@ static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg6; } -static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, +static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5, TCGArg arg6) { @@ -306,9 +330,9 @@ static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, *gen_opparam_ptr++ = arg6; } -static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, - TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5, - TCGArg arg6) +static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGv_i32 arg3, + TCGv_i32 arg4, TCGArg arg5, TCGArg arg6) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I32(arg1); @@ -319,9 +343,9 @@ static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2, *gen_opparam_ptr++ = arg6; } -static inline void tcg_gen_op6ii_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2, - TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5, - TCGArg arg6) +static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGv_i64 arg3, + TCGv_i64 arg4, TCGArg arg5, TCGArg arg6) { *gen_opc_ptr++ = opc; *gen_opparam_ptr++ = GET_TCGV_I64(arg1); @@ -353,6 +377,13 @@ static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); } +/* A version of dh_sizemask from def-helper.h that doesn't rely on + preprocessor magic. */ +static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed) +{ + return (is_64bit << n*2) | (is_signed << (n*2 + 1)); +} + /* helper calls */ static inline void tcg_gen_helperN(void *func, int flags, int sizemask, TCGArg ret, int nargs, TCGArg *args) @@ -364,8 +395,25 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask, tcg_temp_free_ptr(fn); } -/* FIXME: Should this be pure? */ -static inline void tcg_gen_helper64(void *func, TCGv_i64 ret, +/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently + reserved for helpers in tcg-runtime.c. These helpers are all const + and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST | + TCG_CALL_PURE. This may need to be adjusted if these functions + start to be used with other helpers. */ +static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, + TCGv_i32 a, TCGv_i32 b) +{ + TCGv_ptr fn; + TCGArg args[2]; + fn = tcg_const_ptr((tcg_target_long)func); + args[0] = GET_TCGV_I32(a); + args[1] = GET_TCGV_I32(b); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I32(ret), 2, args); + tcg_temp_free_ptr(fn); +} + +static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) { TCGv_ptr fn; @@ -373,7 +421,8 @@ static inline void tcg_gen_helper64(void *func, TCGv_i64 ret, fn = tcg_const_ptr((tcg_target_long)func); args[0] = GET_TCGV_I64(a); args[1] = GET_TCGV_I64(b); - tcg_gen_callN(&tcg_ctx, fn, 0, 7, GET_TCGV_I64(ret), 2, args); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I64(ret), 2, args); tcg_temp_free_ptr(fn); } @@ -575,28 +624,28 @@ static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) } } -static inline void tcg_gen_brcond_i32(int cond, TCGv_i32 arg1, TCGv_i32 arg2, - int label_index) +static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, + TCGv_i32 arg2, int label_index) { tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index); } -static inline void tcg_gen_brcondi_i32(int cond, TCGv_i32 arg1, int32_t arg2, - int label_index) +static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, + int32_t arg2, int label_index) { TCGv_i32 t0 = tcg_const_i32(arg2); tcg_gen_brcond_i32(cond, arg1, t0, label_index); tcg_temp_free_i32(t0); } -static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret, +static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); } -static inline void tcg_gen_setcondi_i32(int cond, TCGv_i32 ret, TCGv_i32 arg1, - int32_t arg2) +static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, int32_t arg2) { TCGv_i32 t0 = tcg_const_i32(arg2); tcg_gen_setcond_i32(cond, ret, arg1, t0); @@ -635,7 +684,7 @@ static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); } -#else +#elif defined(TCG_TARGET_HAS_div2_i32) static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { TCGv_i32 t0; @@ -671,6 +720,50 @@ static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } +#else +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2); +} #endif #if TCG_TARGET_REG_BITS == 32 @@ -829,7 +922,13 @@ static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) specific code (x86) */ static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_shl_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -839,7 +938,13 @@ static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_shr_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -849,7 +954,13 @@ static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_sar_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) @@ -857,15 +968,15 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1); } -static inline void tcg_gen_brcond_i64(int cond, TCGv_i64 arg1, TCGv_i64 arg2, - int label_index) +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) { tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), cond, label_index); } -static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret, +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), @@ -897,22 +1008,46 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_div_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_rem_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_divu_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); } static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - tcg_gen_helper64(tcg_helper_remu_i64, ret, arg1, arg2); + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); } #else @@ -1098,13 +1233,13 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) } } -static inline void tcg_gen_brcond_i64(int cond, TCGv_i64 arg1, TCGv_i64 arg2, - int label_index) +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) { tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index); } -static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret, +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); @@ -1135,7 +1270,7 @@ static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); } -#else +#elif defined(TCG_TARGET_HAS_div2_i64) static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { TCGv_i64 t0; @@ -1171,6 +1306,50 @@ static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } +#else +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); +} #endif #endif @@ -1205,16 +1384,16 @@ static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) tcg_temp_free_i64(t0); } } -static inline void tcg_gen_brcondi_i64(int cond, TCGv_i64 arg1, int64_t arg2, - int label_index) +static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, + int64_t arg2, int label_index) { TCGv_i64 t0 = tcg_const_i64(arg2); tcg_gen_brcond_i64(cond, arg1, t0, label_index); tcg_temp_free_i64(t0); } -static inline void tcg_gen_setcondi_i64(int cond, TCGv_i64 ret, TCGv_i64 arg1, - int64_t arg2) +static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, int64_t arg2) { TCGv_i64 t0 = tcg_const_i64(arg2); tcg_gen_setcond_i64(cond, ret, arg1, t0); @@ -1595,6 +1774,9 @@ static inline void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) { #ifdef TCG_TARGET_HAS_not_i64 tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); +#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); #else tcg_gen_xori_i64(ret, arg, -1); #endif @@ -1679,38 +1861,71 @@ static inline void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { +#ifdef TCG_TARGET_HAS_eqv_i32 + tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); +#else tcg_gen_xor_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); +#endif } static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { +#ifdef TCG_TARGET_HAS_eqv_i64 + tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else tcg_gen_xor_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); +#endif } static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { +#ifdef TCG_TARGET_HAS_nand_i32 + tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); +#else tcg_gen_and_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); +#endif } static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { +#ifdef TCG_TARGET_HAS_nand_i64 + tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else tcg_gen_and_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); +#endif } static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { +#ifdef TCG_TARGET_HAS_nor_i32 + tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); +#else tcg_gen_or_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); +#endif } static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { +#ifdef TCG_TARGET_HAS_nor_i64 + tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else tcg_gen_or_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); +#endif } static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) @@ -1880,6 +2095,44 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) } } +static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, + TCGv_i32 arg2, unsigned int ofs, + unsigned int len) +{ +#ifdef TCG_TARGET_HAS_deposit_i32 + tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); +#else + uint32_t mask = (1u << len) - 1; + TCGv_i32 t1 = tcg_temp_new_i32 (); + + tcg_gen_andi_i32(t1, arg2, mask); + tcg_gen_shli_i32(t1, t1, ofs); + tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); + tcg_gen_or_i32(ret, ret, t1); + + tcg_temp_free_i32(t1); +#endif +} + +static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, + TCGv_i64 arg2, unsigned int ofs, + unsigned int len) +{ +#ifdef TCG_TARGET_HAS_deposit_i64 + tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); +#else + uint64_t mask = (1ull << len) - 1; + TCGv_i64 t1 = tcg_temp_new_i64 (); + + tcg_gen_andi_i64(t1, arg2, mask); + tcg_gen_shli_i64(t1, t1, ofs); + tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); + tcg_gen_or_i64(ret, ret, t1); + + tcg_temp_free_i64(t1); +#endif +} + /***************************************/ /* QEMU specific operations. Their type depend on the QEMU CPU type. */ @@ -1981,9 +2234,9 @@ static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) { #if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld32u, ret, addr, mem_index); + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); #else - tcg_gen_op4i_i32(INDEX_op_qemu_ld32u, TCGV_LOW(ret), TCGV_LOW(addr), + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); #endif @@ -1992,9 +2245,9 @@ static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) { #if TARGET_LONG_BITS == 32 - tcg_gen_op3i_i32(INDEX_op_qemu_ld32u, ret, addr, mem_index); + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); #else - tcg_gen_op4i_i32(INDEX_op_qemu_ld32u, TCGV_LOW(ret), TCGV_LOW(addr), + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); #endif @@ -2078,12 +2331,20 @@ static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) { +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index); +#endif } static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) { +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index); +#endif } static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) @@ -2117,7 +2378,6 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #endif /* TCG_TARGET_REG_BITS != 32 */ #if TARGET_LONG_BITS == 64 -#define TCG_TYPE_TL TCG_TYPE_I64 #define tcg_gen_movi_tl tcg_gen_movi_i64 #define tcg_gen_mov_tl tcg_gen_mov_i64 #define tcg_gen_ld8u_tl tcg_gen_ld8u_i64 @@ -2186,10 +2446,10 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_rotli_tl tcg_gen_rotli_i64 #define tcg_gen_rotr_tl tcg_gen_rotr_i64 #define tcg_gen_rotri_tl tcg_gen_rotri_i64 +#define tcg_gen_deposit_tl tcg_gen_deposit_i64 #define tcg_const_tl tcg_const_i64 #define tcg_const_local_tl tcg_const_local_i64 #else -#define TCG_TYPE_TL TCG_TYPE_I32 #define tcg_gen_movi_tl tcg_gen_movi_i32 #define tcg_gen_mov_tl tcg_gen_mov_i32 #define tcg_gen_ld8u_tl tcg_gen_ld8u_i32 @@ -2257,6 +2517,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_rotli_tl tcg_gen_rotli_i32 #define tcg_gen_rotr_tl tcg_gen_rotr_i32 #define tcg_gen_rotri_tl tcg_gen_rotri_i32 +#define tcg_gen_deposit_tl tcg_gen_deposit_i32 #define tcg_const_tl tcg_const_i32 #define tcg_const_local_tl tcg_const_local_i32 #endif diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 34cdba5..1b8a6e4 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -21,267 +21,295 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#ifndef DEF2 -#define DEF2(name, oargs, iargs, cargs, flags) DEF(name, oargs + iargs + cargs, 0) -#endif + +/* + * DEF(name, oargs, iargs, cargs, flags) + */ /* predefined ops */ -DEF2(end, 0, 0, 0, 0) /* must be kept first */ -DEF2(nop, 0, 0, 0, 0) -DEF2(nop1, 0, 0, 1, 0) -DEF2(nop2, 0, 0, 2, 0) -DEF2(nop3, 0, 0, 3, 0) -DEF2(nopn, 0, 0, 1, 0) /* variable number of parameters */ +DEF(end, 0, 0, 0, 0) /* must be kept first */ +DEF(nop, 0, 0, 0, 0) +DEF(nop1, 0, 0, 1, 0) +DEF(nop2, 0, 0, 2, 0) +DEF(nop3, 0, 0, 3, 0) +DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ -DEF2(discard, 1, 0, 0, 0) +DEF(discard, 1, 0, 0, 0) -DEF2(set_label, 0, 0, 1, 0) -DEF2(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ -DEF2(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) -DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(set_label, 0, 0, 1, 0) +DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) -DEF2(mov_i32, 1, 1, 0, 0) -DEF2(movi_i32, 1, 0, 1, 0) -DEF2(setcond_i32, 1, 2, 1, 0) +DEF(mov_i32, 1, 1, 0, 0) +DEF(movi_i32, 1, 0, 1, 0) +DEF(setcond_i32, 1, 2, 1, 0) /* load/store */ -DEF2(ld8u_i32, 1, 1, 1, 0) -DEF2(ld8s_i32, 1, 1, 1, 0) -DEF2(ld16u_i32, 1, 1, 1, 0) -DEF2(ld16s_i32, 1, 1, 1, 0) -DEF2(ld_i32, 1, 1, 1, 0) -DEF2(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF2(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF2(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(ld8u_i32, 1, 1, 1, 0) +DEF(ld8s_i32, 1, 1, 1, 0) +DEF(ld16u_i32, 1, 1, 1, 0) +DEF(ld16s_i32, 1, 1, 1, 0) +DEF(ld_i32, 1, 1, 1, 0) +DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) /* arith */ -DEF2(add_i32, 1, 2, 0, 0) -DEF2(sub_i32, 1, 2, 0, 0) -DEF2(mul_i32, 1, 2, 0, 0) +DEF(add_i32, 1, 2, 0, 0) +DEF(sub_i32, 1, 2, 0, 0) +DEF(mul_i32, 1, 2, 0, 0) #ifdef TCG_TARGET_HAS_div_i32 -DEF2(div_i32, 1, 2, 0, 0) -DEF2(divu_i32, 1, 2, 0, 0) -DEF2(rem_i32, 1, 2, 0, 0) -DEF2(remu_i32, 1, 2, 0, 0) -#else -DEF2(div2_i32, 2, 3, 0, 0) -DEF2(divu2_i32, 2, 3, 0, 0) -#endif -DEF2(and_i32, 1, 2, 0, 0) -DEF2(or_i32, 1, 2, 0, 0) -DEF2(xor_i32, 1, 2, 0, 0) +DEF(div_i32, 1, 2, 0, 0) +DEF(divu_i32, 1, 2, 0, 0) +DEF(rem_i32, 1, 2, 0, 0) +DEF(remu_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i32 +DEF(div2_i32, 2, 3, 0, 0) +DEF(divu2_i32, 2, 3, 0, 0) +#endif +DEF(and_i32, 1, 2, 0, 0) +DEF(or_i32, 1, 2, 0, 0) +DEF(xor_i32, 1, 2, 0, 0) /* shifts/rotates */ -DEF2(shl_i32, 1, 2, 0, 0) -DEF2(shr_i32, 1, 2, 0, 0) -DEF2(sar_i32, 1, 2, 0, 0) +DEF(shl_i32, 1, 2, 0, 0) +DEF(shr_i32, 1, 2, 0, 0) +DEF(sar_i32, 1, 2, 0, 0) #ifdef TCG_TARGET_HAS_rot_i32 -DEF2(rotl_i32, 1, 2, 0, 0) -DEF2(rotr_i32, 1, 2, 0, 0) +DEF(rotl_i32, 1, 2, 0, 0) +DEF(rotr_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_deposit_i32 +DEF(deposit_i32, 1, 2, 2, 0) #endif -DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) #if TCG_TARGET_REG_BITS == 32 -DEF2(add2_i32, 2, 4, 0, 0) -DEF2(sub2_i32, 2, 4, 0, 0) -DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) -DEF2(mulu2_i32, 2, 2, 0, 0) -DEF2(setcond2_i32, 1, 4, 1, 0) +DEF(add2_i32, 2, 4, 0, 0) +DEF(sub2_i32, 2, 4, 0, 0) +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(mulu2_i32, 2, 2, 0, 0) +DEF(setcond2_i32, 1, 4, 1, 0) #endif #ifdef TCG_TARGET_HAS_ext8s_i32 -DEF2(ext8s_i32, 1, 1, 0, 0) +DEF(ext8s_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext16s_i32 -DEF2(ext16s_i32, 1, 1, 0, 0) +DEF(ext16s_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext8u_i32 -DEF2(ext8u_i32, 1, 1, 0, 0) +DEF(ext8u_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext16u_i32 -DEF2(ext16u_i32, 1, 1, 0, 0) +DEF(ext16u_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_bswap16_i32 -DEF2(bswap16_i32, 1, 1, 0, 0) +DEF(bswap16_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_bswap32_i32 -DEF2(bswap32_i32, 1, 1, 0, 0) +DEF(bswap32_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_not_i32 -DEF2(not_i32, 1, 1, 0, 0) +DEF(not_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_neg_i32 -DEF2(neg_i32, 1, 1, 0, 0) +DEF(neg_i32, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_andc_i32 -DEF2(andc_i32, 1, 2, 0, 0) +DEF(andc_i32, 1, 2, 0, 0) #endif #ifdef TCG_TARGET_HAS_orc_i32 -DEF2(orc_i32, 1, 2, 0, 0) +DEF(orc_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i32 +DEF(eqv_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i32 +DEF(nand_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i32 +DEF(nor_i32, 1, 2, 0, 0) #endif #if TCG_TARGET_REG_BITS == 64 -DEF2(mov_i64, 1, 1, 0, 0) -DEF2(movi_i64, 1, 0, 1, 0) -DEF2(setcond_i64, 1, 2, 1, 0) +DEF(mov_i64, 1, 1, 0, 0) +DEF(movi_i64, 1, 0, 1, 0) +DEF(setcond_i64, 1, 2, 1, 0) /* load/store */ -DEF2(ld8u_i64, 1, 1, 1, 0) -DEF2(ld8s_i64, 1, 1, 1, 0) -DEF2(ld16u_i64, 1, 1, 1, 0) -DEF2(ld16s_i64, 1, 1, 1, 0) -DEF2(ld32u_i64, 1, 1, 1, 0) -DEF2(ld32s_i64, 1, 1, 1, 0) -DEF2(ld_i64, 1, 1, 1, 0) -DEF2(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF2(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF2(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF2(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(ld8u_i64, 1, 1, 1, 0) +DEF(ld8s_i64, 1, 1, 1, 0) +DEF(ld16u_i64, 1, 1, 1, 0) +DEF(ld16s_i64, 1, 1, 1, 0) +DEF(ld32u_i64, 1, 1, 1, 0) +DEF(ld32s_i64, 1, 1, 1, 0) +DEF(ld_i64, 1, 1, 1, 0) +DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) /* arith */ -DEF2(add_i64, 1, 2, 0, 0) -DEF2(sub_i64, 1, 2, 0, 0) -DEF2(mul_i64, 1, 2, 0, 0) +DEF(add_i64, 1, 2, 0, 0) +DEF(sub_i64, 1, 2, 0, 0) +DEF(mul_i64, 1, 2, 0, 0) #ifdef TCG_TARGET_HAS_div_i64 -DEF2(div_i64, 1, 2, 0, 0) -DEF2(divu_i64, 1, 2, 0, 0) -DEF2(rem_i64, 1, 2, 0, 0) -DEF2(remu_i64, 1, 2, 0, 0) -#else -DEF2(div2_i64, 2, 3, 0, 0) -DEF2(divu2_i64, 2, 3, 0, 0) -#endif -DEF2(and_i64, 1, 2, 0, 0) -DEF2(or_i64, 1, 2, 0, 0) -DEF2(xor_i64, 1, 2, 0, 0) +DEF(div_i64, 1, 2, 0, 0) +DEF(divu_i64, 1, 2, 0, 0) +DEF(rem_i64, 1, 2, 0, 0) +DEF(remu_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i64 +DEF(div2_i64, 2, 3, 0, 0) +DEF(divu2_i64, 2, 3, 0, 0) +#endif +DEF(and_i64, 1, 2, 0, 0) +DEF(or_i64, 1, 2, 0, 0) +DEF(xor_i64, 1, 2, 0, 0) /* shifts/rotates */ -DEF2(shl_i64, 1, 2, 0, 0) -DEF2(shr_i64, 1, 2, 0, 0) -DEF2(sar_i64, 1, 2, 0, 0) +DEF(shl_i64, 1, 2, 0, 0) +DEF(shr_i64, 1, 2, 0, 0) +DEF(sar_i64, 1, 2, 0, 0) #ifdef TCG_TARGET_HAS_rot_i64 -DEF2(rotl_i64, 1, 2, 0, 0) -DEF2(rotr_i64, 1, 2, 0, 0) +DEF(rotl_i64, 1, 2, 0, 0) +DEF(rotr_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_deposit_i64 +DEF(deposit_i64, 1, 2, 2, 0) #endif -DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) #ifdef TCG_TARGET_HAS_ext8s_i64 -DEF2(ext8s_i64, 1, 1, 0, 0) +DEF(ext8s_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext16s_i64 -DEF2(ext16s_i64, 1, 1, 0, 0) +DEF(ext16s_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext32s_i64 -DEF2(ext32s_i64, 1, 1, 0, 0) +DEF(ext32s_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext8u_i64 -DEF2(ext8u_i64, 1, 1, 0, 0) +DEF(ext8u_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext16u_i64 -DEF2(ext16u_i64, 1, 1, 0, 0) +DEF(ext16u_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_ext32u_i64 -DEF2(ext32u_i64, 1, 1, 0, 0) +DEF(ext32u_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_bswap16_i64 -DEF2(bswap16_i64, 1, 1, 0, 0) +DEF(bswap16_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_bswap32_i64 -DEF2(bswap32_i64, 1, 1, 0, 0) +DEF(bswap32_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_bswap64_i64 -DEF2(bswap64_i64, 1, 1, 0, 0) +DEF(bswap64_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_not_i64 -DEF2(not_i64, 1, 1, 0, 0) +DEF(not_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_neg_i64 -DEF2(neg_i64, 1, 1, 0, 0) +DEF(neg_i64, 1, 1, 0, 0) #endif #ifdef TCG_TARGET_HAS_andc_i64 -DEF2(andc_i64, 1, 2, 0, 0) +DEF(andc_i64, 1, 2, 0, 0) #endif #ifdef TCG_TARGET_HAS_orc_i64 -DEF2(orc_i64, 1, 2, 0, 0) +DEF(orc_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i64 +DEF(eqv_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i64 +DEF(nand_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i64 +DEF(nor_i64, 1, 2, 0, 0) #endif #endif /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS -DEF2(debug_insn_start, 0, 0, 2, 0) +DEF(debug_insn_start, 0, 0, 2, 0) #else -DEF2(debug_insn_start, 0, 0, 1, 0) +DEF(debug_insn_start, 0, 0, 1, 0) #endif -DEF2(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) -DEF2(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) /* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op constants must be defined */ #if TCG_TARGET_REG_BITS == 32 #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld32u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld32s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #if TARGET_LONG_BITS == 32 -DEF2(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #else -DEF2(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif #else /* TCG_TARGET_REG_BITS == 32 */ -DEF2(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF2(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif /* TCG_TARGET_REG_BITS != 32 */ -#undef DEF2 +#undef DEF diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h index e750cc1..5615b13 100644 --- a/tcg/tcg-runtime.h +++ b/tcg/tcg-runtime.h @@ -2,6 +2,11 @@ #define TCG_RUNTIME_H /* tcg-runtime.c */ +int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2); +int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2); +uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2); +uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2); + int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); @@ -63,15 +63,15 @@ #error GUEST_BASE not supported on this host. #endif +static void tcg_target_init(TCGContext *s); +static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend); static TCGOpDef tcg_op_defs[] = { -#define DEF(s, n, copy_size) { #s, 0, 0, n, n, 0, copy_size }, -#define DEF2(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags, 0 }, +#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, #include "tcg-opc.h" #undef DEF -#undef DEF2 }; static TCGRegSet tcg_target_available_regs[2]; @@ -110,7 +110,7 @@ static inline void tcg_out32(TCGContext *s, uint32_t v) /* label relocation processing */ -void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, +static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, int label_index, long addend) { TCGLabel *l; @@ -250,7 +250,10 @@ void tcg_context_init(TCGContext *s) } tcg_target_init(s); +} +void tcg_prologue_init(TCGContext *s) +{ /* init global prologue and epilogue */ s->code_buf = code_gen_prologue; s->code_ptr = s->code_buf; @@ -457,6 +460,10 @@ static inline int tcg_temp_new_internal(TCGType type, int temp_local) s->nb_temps++; } } + +#if defined(CONFIG_DEBUG_TCG) + s->temps_in_use++; +#endif return idx; } @@ -482,6 +489,13 @@ static inline void tcg_temp_free_internal(int idx) TCGTemp *ts; int k; +#if defined(CONFIG_DEBUG_TCG) + s->temps_in_use--; + if (s->temps_in_use < 0) { + fprintf(stderr, "More temporaries freed than allocated!\n"); + } +#endif + assert(idx >= s->nb_globals && idx < s->nb_temps); ts = &s->temps[idx]; assert(ts->temp_allocated != 0); @@ -535,6 +549,27 @@ TCGv_i64 tcg_const_local_i64(int64_t val) return t0; } +#if defined(CONFIG_DEBUG_TCG) +void tcg_clear_temp_count(void) +{ + TCGContext *s = &tcg_ctx; + s->temps_in_use = 0; +} + +int tcg_check_temp_count(void) +{ + TCGContext *s = &tcg_ctx; + if (s->temps_in_use) { + /* Clear the count so that we don't give another + * warning immediately next time around. + */ + s->temps_in_use = 0; + return 1; + } + return 0; +} +#endif + void tcg_register_helper(void *func, const char *name) { TCGContext *s = &tcg_ctx; @@ -560,14 +595,36 @@ void tcg_register_helper(void *func, const char *name) void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, int sizemask, TCGArg ret, int nargs, TCGArg *args) { +#ifdef TCG_TARGET_I386 int call_type; +#endif int i; int real_args; int nb_rets; TCGArg *nparam; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + int is_signed = sizemask & (2 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = tcg_temp_new_i64(); + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + if (is_signed) { + tcg_gen_ext32s_i64(temp, orig); + } else { + tcg_gen_ext32u_i64(temp, orig); + } + args[i] = GET_TCGV_I64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ + *gen_opc_ptr++ = INDEX_op_call; nparam = gen_opparam_ptr++; +#ifdef TCG_TARGET_I386 call_type = (flags & TCG_CALL_TYPE_MASK); +#endif if (ret != TCG_CALL_DUMMY_ARG) { #if TCG_TARGET_REG_BITS < 64 if (sizemask & 1) { @@ -591,7 +648,8 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, real_args = 0; for (i = 0; i < nargs; i++) { #if TCG_TARGET_REG_BITS < 64 - if (sizemask & (2 << i)) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { #ifdef TCG_TARGET_I386 /* REGPARM case: if the third parameter is 64 bit, it is allocated on the stack */ @@ -607,7 +665,17 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, real_args++; } #endif -#ifdef TCG_TARGET_WORDS_BIGENDIAN + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ +#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) *gen_opparam_ptr++ = args[i] + 1; *gen_opparam_ptr++ = args[i]; #else @@ -615,13 +683,13 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, *gen_opparam_ptr++ = args[i] + 1; #endif real_args += 2; - } else -#endif - { + continue; + } +#endif /* TCG_TARGET_REG_BITS < 64 */ + *gen_opparam_ptr++ = args[i]; real_args++; } - } *gen_opparam_ptr++ = GET_TCGV_PTR(func); *gen_opparam_ptr++ = flags; @@ -630,6 +698,16 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, /* total parameters, needed to go backward in the instruction stream */ *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = MAKE_TCGV_I64(args[i]); + tcg_temp_free_i64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ } #if TCG_TARGET_REG_BITS == 32 @@ -681,6 +759,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, } #endif + static void tcg_reg_alloc_start(TCGContext *s) { int i; @@ -793,7 +872,8 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile) const uint16_t *opc_ptr; const TCGArg *args; TCGArg arg; - int c, i, k, nb_oargs, nb_iargs, nb_cargs, first_insn; + TCGOpcode c; + int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn; const TCGOpDef *def; char buf[128]; @@ -980,16 +1060,16 @@ static void sort_constraints(TCGOpDef *def, int start, int n) void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) { - int op; + TCGOpcode op; TCGOpDef *def; const char *ct_str; int i, nb_args; for(;;) { - if (tdefs->op < 0) + if (tdefs->op == (TCGOpcode)-1) break; op = tdefs->op; - assert(op >= 0 && op < NB_OPS); + assert((unsigned)op < NB_OPS); def = &tcg_op_defs[op]; #if defined(CONFIG_DEBUG_TCG) /* Duplicate entry in op definitions? */ @@ -1056,15 +1136,27 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) } #if defined(CONFIG_DEBUG_TCG) + i = 0; for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) { /* Wrong entry in op definitions? */ - assert(!tcg_op_defs[op].used); + if (tcg_op_defs[op].used) { + fprintf(stderr, "Invalid op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } } else { /* Missing entry in op definitions? */ - assert(tcg_op_defs[op].used); + if (!tcg_op_defs[op].used) { + fprintf(stderr, "Missing op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } } } + if (i == 1) { + tcg_abort(); + } #endif } @@ -1116,7 +1208,8 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps) temporaries are removed. */ static void tcg_liveness_analysis(TCGContext *s) { - int i, op_index, op, nb_args, nb_iargs, nb_oargs, arg, nb_ops; + int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; + TCGOpcode op; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps; @@ -1271,7 +1364,7 @@ static void tcg_liveness_analysis(TCGContext *s) } #else /* dummy liveness analysis */ -void tcg_liveness_analysis(TCGContext *s) +static void tcg_liveness_analysis(TCGContext *s) { int nb_ops; nb_ops = gen_opc_ptr - gen_opc_buf; @@ -1532,7 +1625,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); } if (ts->reg != reg) { - tcg_out_mov(s, reg, ts->reg); + tcg_out_mov(s, ots->type, reg, ts->reg); } } } else if (ts->val_type == TEMP_VAL_MEM) { @@ -1564,7 +1657,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, } static void tcg_reg_alloc_op(TCGContext *s, - const TCGOpDef *def, int opc, + const TCGOpDef *def, TCGOpcode opc, const TCGArg *args, unsigned int dead_iargs) { @@ -1637,7 +1730,7 @@ static void tcg_reg_alloc_op(TCGContext *s, /* allocate a new register matching the constraint and move the temporary register into it */ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_mov(s, reg, ts->reg); + tcg_out_mov(s, ts->type, reg, ts->reg); } new_args[i] = reg; const_args[i] = 0; @@ -1719,7 +1812,7 @@ static void tcg_reg_alloc_op(TCGContext *s, ts = &s->temps[args[i]]; reg = new_args[i]; if (ts->fixed_reg && ts->reg != reg) { - tcg_out_mov(s, ts->reg, reg); + tcg_out_mov(s, ts->type, ts->reg, reg); } } } @@ -1731,7 +1824,7 @@ static void tcg_reg_alloc_op(TCGContext *s, #endif static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, - int opc, const TCGArg *args, + TCGOpcode opc, const TCGArg *args, unsigned int dead_iargs) { int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; @@ -1805,7 +1898,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, tcg_reg_free(s, reg); if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { - tcg_out_mov(s, reg, ts->reg); + tcg_out_mov(s, ts->type, reg, ts->reg); } } else if (ts->val_type == TEMP_VAL_MEM) { tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); @@ -1834,7 +1927,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, reg = ts->reg; if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) { reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); - tcg_out_mov(s, reg, ts->reg); + tcg_out_mov(s, ts->type, reg, ts->reg); } func_arg = reg; tcg_regset_set_reg(allocated_regs, reg); @@ -1893,7 +1986,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, assert(s->reg_to_temp[reg] == -1); if (ts->fixed_reg) { if (ts->reg != reg) { - tcg_out_mov(s, ts->reg, reg); + tcg_out_mov(s, ts->type, ts->reg, reg); } } else { if (ts->val_type == TEMP_VAL_REG) @@ -1912,7 +2005,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, static int64_t tcg_table_op_count[NB_OPS]; -void dump_op_count(void) +static void dump_op_count(void) { int i; FILE *f; @@ -1928,7 +2021,8 @@ void dump_op_count(void) static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, long search_pc) { - int opc, op_index; + TCGOpcode opc; + int op_index; const TCGOpDef *def; unsigned int dead_iargs; const TCGArg *args; @@ -2109,8 +2203,7 @@ int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset) } #ifdef CONFIG_PROFILER -void tcg_dump_info(FILE *f, - int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) { TCGContext *s = &tcg_ctx; int64_t tot; @@ -2154,8 +2247,7 @@ void tcg_dump_info(FILE *f, dump_op_count(); } #else -void tcg_dump_info(FILE *f, - int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) { cpu_fprintf(f, "[TCG profiler not compiled]\n"); } @@ -47,12 +47,12 @@ typedef uint64_t TCGRegSet; #error unsupported #endif -enum { -#define DEF(s, n, copy_size) INDEX_op_ ## s, +typedef enum TCGOpcode { +#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, #include "tcg-opc.h" #undef DEF NB_OPS, -}; +} TCGOpcode; #define tcg_regset_clear(d) (d) = 0 #define tcg_regset_set(d, s) (d) = (s) @@ -96,17 +96,29 @@ typedef struct TCGPool { this value, they are statically allocated in the TB stack frame */ #define TCG_STATIC_CALL_ARGS_SIZE 128 -typedef int TCGType; - -#define TCG_TYPE_I32 0 -#define TCG_TYPE_I64 1 -#define TCG_TYPE_COUNT 2 /* number of different types */ +typedef enum TCGType { + TCG_TYPE_I32, + TCG_TYPE_I64, + TCG_TYPE_COUNT, /* number of different types */ + /* An alias for the size of the host register. */ #if TCG_TARGET_REG_BITS == 32 -#define TCG_TYPE_PTR TCG_TYPE_I32 + TCG_TYPE_REG = TCG_TYPE_I32, +#else + TCG_TYPE_REG = TCG_TYPE_I64, +#endif + + /* An alias for the size of the native pointer. We don't currently + support any hosts with 64-bit registers and 32-bit pointers. */ + TCG_TYPE_PTR = TCG_TYPE_REG, + + /* An alias for the size of the target "long", aka register. */ +#if TARGET_LONG_BITS == 64 + TCG_TYPE_TL = TCG_TYPE_I64, #else -#define TCG_TYPE_PTR TCG_TYPE_I64 + TCG_TYPE_TL = TCG_TYPE_I32, #endif +} TCGType; typedef tcg_target_ulong TCGArg; @@ -311,6 +323,10 @@ struct TCGContext { int64_t restore_count; int64_t restore_time; #endif + +#ifdef CONFIG_DEBUG_TCG + int temps_in_use; +#endif }; extern TCGContext tcg_ctx; @@ -341,6 +357,7 @@ static inline void *tcg_malloc(int size) } void tcg_context_init(TCGContext *s); +void tcg_prologue_init(TCGContext *s); void tcg_func_start(TCGContext *s); int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); @@ -379,8 +396,20 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void) void tcg_temp_free_i64(TCGv_i64 arg); char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); -void tcg_dump_info(FILE *f, - int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); +#if defined(CONFIG_DEBUG_TCG) +/* If you call tcg_clear_temp_count() at the start of a section of + * code which is not supposed to leak any TCG temporaries, then + * calling tcg_check_temp_count() at the end of the section will + * return 1 if the section did in fact leak a temporary. + */ +void tcg_clear_temp_count(void); +int tcg_check_temp_count(void); +#else +#define tcg_clear_temp_count() do { } while (0) +#define tcg_check_temp_count() 0 +#endif + +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); #define TCG_CT_ALIAS 0x80 #define TCG_CT_IALIAS 0x40 @@ -409,7 +438,6 @@ typedef struct TCGOpDef { const char *name; uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; uint8_t flags; - uint16_t copy_size; TCGArgConstraint *args_ct; int *sorted_args; #if defined(CONFIG_DEBUG_TCG) @@ -418,13 +446,10 @@ typedef struct TCGOpDef { } TCGOpDef; typedef struct TCGTargetOpDef { - int op; + TCGOpcode op; const char *args_ct_str[TCG_MAX_OP_ARGS]; } TCGTargetOpDef; -void tcg_target_init(TCGContext *s); -void tcg_target_qemu_prologue(TCGContext *s); - #define tcg_abort() \ do {\ fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\ @@ -472,9 +497,6 @@ TCGv_i64 tcg_const_i64(int64_t val); TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); -void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, - int label_index, long addend); - extern uint8_t code_gen_prologue[]; #if defined(_ARCH_PPC) && !defined(_ARCH_PPC64) #define tcg_qemu_tb_exec(tb_ptr) \ diff --git a/translate-all.c b/translate-all.c index 0bab63f..ed174e7 100644 --- a/translate-all.c +++ b/translate-all.c @@ -72,9 +72,11 @@ unsigned long code_gen_max_block_size(void) if (max == 0) { max = TCG_MAX_OP_SIZE; -#define DEF(s, n, copy_size) max = copy_size > max? copy_size : max; +#define DEF(name, iarg, oarg, carg, flags) DEF2((iarg) + (oarg) + (carg)) +#define DEF2(copy_size) max = (copy_size > max) ? copy_size : max; #include "tcg-opc.h" #undef DEF +#undef DEF2 max *= OPC_MAX_SIZE; } |