diff options
author | David 'Digit' Turner <digit@google.com> | 2009-10-05 14:06:05 -0700 |
---|---|---|
committer | David 'Digit' Turner <digit@google.com> | 2009-10-05 14:06:05 -0700 |
commit | ddf49e53df97a349f42c733059165dc73c9907dc (patch) | |
tree | 94f840eab7a00cb543b37e1ab9086bc26df5245e | |
parent | acbee3546b9a380a4eb33daef3dccfac87c56b0b (diff) | |
download | external_qemu-ddf49e53df97a349f42c733059165dc73c9907dc.zip external_qemu-ddf49e53df97a349f42c733059165dc73c9907dc.tar.gz external_qemu-ddf49e53df97a349f42c733059165dc73c9907dc.tar.bz2 |
Workaround ARMv7 emulation issues.
This patch is used to disable the code generator's liveness analysis pass when we
emulate an ARMv7 CPU. This is required to properly run the dex preoptimization pass
during the build of -user system images.
Also includes:
- a fix for a sad typo in target-arm/translate.c related to NEON instruction emulation
- upstream improvements to the x86 and x86_64 backends to generate better goto branches at runtime
- upstream fixes for 64-bit swap and shift operations in TCG
After this patch is applied, re-enabling the ARMv7 memcpy should allow to run the dex preopt
pass succesfully. Anything else is untested though. WE STILL NEED TO FIX THE CODE GENERATOR.
-rw-r--r-- | android/main.c | 10 | ||||
-rw-r--r-- | target-arm/translate.c | 2 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 30 | ||||
-rw-r--r-- | tcg/tcg-op.h | 11 | ||||
-rw-r--r-- | tcg/tcg.c | 12 | ||||
-rw-r--r-- | tcg/tcg.h | 4 | ||||
-rw-r--r-- | tcg/x86_64/tcg-target.c | 48 |
7 files changed, 86 insertions, 31 deletions
diff --git a/android/main.c b/android/main.c index 682cfce..686dac8 100644 --- a/android/main.c +++ b/android/main.c @@ -67,6 +67,8 @@ #include "android/globals.h" #include "tcpdump.h" +#include "tcg.h" + /* in vl.c */ extern void qemu_help(int code); @@ -2356,6 +2358,14 @@ int main(int argc, char **argv) args[n++] = "-cpu"; args[n++] = "cortex-a8"; } + /* we also disable liveness analysis in the code generator, because it seems + * that ARMv7 -> x86 code generation triggers a fatal assertion when it is + * activated. The drawback is that the generated code is slower, but at the + * moment, ARMv7 emulation is only used to run the dex preopt pass within the + * Android build system. This hack should be removed when we fix the code + * generator. + */ + tcg_disable_liveness_analysis = 1; } args[n++] = "-initrd"; diff --git a/target-arm/translate.c b/target-arm/translate.c index ad3ab44..15239d1 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3828,7 +3828,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_dup_low16(tmp); break; case 2: - tmp = gen_ld32(cpu_T[0], IS_USER(s)); + tmp = gen_ld32(cpu_T[1], IS_USER(s)); break; case 3: return 1; diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index e0fd434..e748ba2 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -276,11 +276,23 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2); } -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val) +static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf) { - if (val == (int8_t)val) { + if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) { + /* inc */ + tcg_out_opc(s, 0x40 + r0); + } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) { + /* dec */ + tcg_out_opc(s, 0x48 + r0); + } else if (val == (int8_t)val) { tcg_out_modrm(s, 0x83, c, r0); tcg_out8(s, val); + } else if (c == ARITH_AND && val == 0xffu && r0 < 4) { + /* movzbl */ + tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0); + } else if (c == ARITH_AND && val == 0xffffu) { + /* movzwl */ + tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0); } else { tcg_out_modrm(s, 0x81, c, r0); tcg_out32(s, val); @@ -290,7 +302,7 @@ static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val) static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) { if (val != 0) - tgen_arithi(s, ARITH_ADD, reg, val); + tgen_arithi(s, ARITH_ADD, reg, val, 0); } static void tcg_out_jxx(TCGContext *s, int opc, int label_index) @@ -338,7 +350,7 @@ static void tcg_out_brcond(TCGContext *s, int cond, /* test r, r */ tcg_out_modrm(s, 0x85, arg1, arg1); } else { - tgen_arithi(s, ARITH_CMP, arg1, arg2); + tgen_arithi(s, ARITH_CMP, arg1, arg2, 0); } } else { tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); @@ -951,7 +963,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, c = ARITH_ADD; gen_arith: if (const_args[2]) { - tgen_arithi(s, c, args[0], args[2]); + tgen_arithi(s, c, args[0], args[2], 0); } else { tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]); } @@ -1009,21 +1021,21 @@ static inline void tcg_out_op(TCGContext *s, int opc, case INDEX_op_add2_i32: if (const_args[4]) - tgen_arithi(s, ARITH_ADD, args[0], args[4]); + tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); else tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]); if (const_args[5]) - tgen_arithi(s, ARITH_ADC, args[1], args[5]); + tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); else tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]); break; case INDEX_op_sub2_i32: if (const_args[4]) - tgen_arithi(s, ARITH_SUB, args[0], args[4]); + tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); else tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]); if (const_args[5]) - tgen_arithi(s, ARITH_SBB, args[1], args[5]); + tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); else tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]); break; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index daeb025..7cb6934 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -1441,9 +1441,8 @@ static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) #ifdef TCG_TARGET_HAS_bswap64_i64 tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); #else - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); tcg_gen_shli_i64(t0, arg, 56); @@ -1473,8 +1472,8 @@ static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_shri_i64(t1, arg, 56); tcg_gen_or_i64(ret, t0, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); #endif } @@ -1749,7 +1748,7 @@ static inline void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - tcg_gen_shl_i64(t0, arg1, arg2); + tcg_gen_shr_i64(t0, arg1, arg2); tcg_gen_subfi_i64(t1, 64, arg2); tcg_gen_shl_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); @@ -57,6 +57,7 @@ #include "tcg-op.h" #include "elf.h" +int tcg_disable_liveness_analysis; static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend); @@ -1077,7 +1078,16 @@ static void tcg_liveness_analysis(TCGContext *s) const TCGOpDef *def; uint8_t *dead_temps; unsigned int dead_iargs; - + + if (tcg_disable_liveness_analysis) { + int nb_ops; + nb_ops = gen_opc_ptr - gen_opc_buf + 1; + + s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t)); + memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t)); + return; + } + gen_opc_ptr++; /* skip end */ nb_ops = gen_opc_ptr - gen_opc_buf; @@ -471,3 +471,7 @@ extern uint8_t code_gen_prologue[]; #else #define tcg_qemu_tb_exec(tb_ptr) ((long REGPARM (*)(void *))code_gen_prologue)(tb_ptr) #endif + +/* set to 1 to disable LIVENESS ANALYSIS - temporary work-around for + * specific fatal assertion error in ARMv7 -> x86 code translation. */ +extern int tcg_disable_liveness_analysis; diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c index 5378e85..a26e714 100644 --- a/tcg/x86_64/tcg-target.c +++ b/tcg/x86_64/tcg-target.c @@ -363,6 +363,20 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_goto(TCGContext *s, int call, uint8_t *target) +{ + int32_t disp; + + disp = target - s->code_ptr - 5; + if (disp == (target - s->code_ptr - 5)) { + tcg_out8(s, call ? 0xe8 : 0xe9); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (tcg_target_long) target); + tcg_out_modrm(s, 0xff, call ? 2 : 4, TCG_REG_R10); + } +} + static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1, tcg_target_long arg2) { @@ -383,7 +397,13 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, static inline void tgen_arithi32(TCGContext *s, int c, int r0, int32_t val) { - if (val == (int8_t)val) { + if ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1)) { + /* inc */ + tcg_out_modrm(s, 0xff, 0, r0); + } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) { + /* dec */ + tcg_out_modrm(s, 0xff, 1, r0); + } else if (val == (int8_t)val) { tcg_out_modrm(s, 0x83, c, r0); tcg_out8(s, val); } else if (c == ARITH_AND && val == 0xffu) { @@ -400,7 +420,13 @@ static inline void tgen_arithi32(TCGContext *s, int c, int r0, int32_t val) static inline void tgen_arithi64(TCGContext *s, int c, int r0, int64_t val) { - if (val == (int8_t)val) { + if ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1)) { + /* inc */ + tcg_out_modrm(s, 0xff | P_REXW, 0, r0); + } else if ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1)) { + /* dec */ + tcg_out_modrm(s, 0xff | P_REXW, 1, r0); + } else if (val == (int8_t)val) { tcg_out_modrm(s, 0x83 | P_REXW, c, r0); tcg_out8(s, val); } else if (c == ARITH_AND && val == 0xffu) { @@ -508,6 +534,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { int addr_reg, data_reg, r0, r1, mem_index, s_bits, bswap, rexw; + int32_t offset; #if defined(CONFIG_SOFTMMU) uint8_t *label1_ptr, *label2_ptr; #endif @@ -558,9 +585,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, /* XXX: move that code at the end of the TB */ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RSI, mem_index); - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_ld_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); + tcg_out_goto(s, 1, qemu_ld_helpers[s_bits]); switch(opc) { case 0 | 4: @@ -760,9 +785,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, break; } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index); - tcg_out8(s, 0xe8); - tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - - (tcg_target_long)s->code_ptr - 4); + tcg_out_goto(s, 1, qemu_st_helpers[s_bits]); /* jmp label2 */ tcg_out8(s, 0xeb); @@ -839,8 +862,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, switch(opc) { case INDEX_op_exit_tb: tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, args[0]); - tcg_out8(s, 0xe9); /* jmp tb_ret_addr */ - tcg_out32(s, tb_ret_addr - s->code_ptr - 4); + tcg_out_goto(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: if (s->tb_jmp_offset) { @@ -859,16 +881,14 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, break; case INDEX_op_call: if (const_args[0]) { - tcg_out8(s, 0xe8); - tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4); + tcg_out_goto(s, 1, (void *) args[0]); } else { tcg_out_modrm(s, 0xff, 2, args[0]); } break; case INDEX_op_jmp: if (const_args[0]) { - tcg_out8(s, 0xe9); - tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4); + tcg_out_goto(s, 0, (void *) args[0]); } else { tcg_out_modrm(s, 0xff, 4, args[0]); } |