diff options
Diffstat (limited to 'target-arm')
-rw-r--r-- | target-arm/cpu.h | 149 | ||||
-rw-r--r-- | target-arm/exec.h | 12 | ||||
-rw-r--r-- | target-arm/helper.c | 1202 | ||||
-rw-r--r-- | target-arm/helper.h (renamed from target-arm/helpers.h) | 315 | ||||
-rw-r--r-- | target-arm/iwmmxt_helper.c | 85 | ||||
-rw-r--r-- | target-arm/machine.c | 40 | ||||
-rw-r--r-- | target-arm/neon_helper.c | 907 | ||||
-rw-r--r-- | target-arm/op_addsub.h | 8 | ||||
-rw-r--r-- | target-arm/op_helper.c | 314 | ||||
-rw-r--r-- | target-arm/translate.c | 5102 |
10 files changed, 5106 insertions, 3028 deletions
diff --git a/target-arm/cpu.h b/target-arm/cpu.h index f16e391..b842c52 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -43,6 +43,7 @@ #define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */ #define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */ #define EXCP_STREX 10 +#define EXCP_SMC 11 /* secure monitor call */ #define ARMV7M_EXCP_RESET 1 #define ARMV7M_EXCP_NMI 2 @@ -55,10 +56,16 @@ #define ARMV7M_EXCP_PENDSV 14 #define ARMV7M_EXCP_SYSTICK 15 +/* ARM-specific interrupt pending bits. */ +#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1 + + typedef void ARMWriteCPFunc(void *opaque, int cp_info, - int srcreg, int operand, uint32_t value); + int srcreg, int operand, uint32_t value, + void *retaddr); typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info, - int dstreg, int operand); + int dstreg, int operand, + void *retaddr); struct arm_boot_info; @@ -82,9 +89,9 @@ typedef struct CPUARMState { uint32_t spsr; /* Banked registers. */ - uint32_t banked_spsr[6]; - uint32_t banked_r13[6]; - uint32_t banked_r14[6]; + uint32_t banked_spsr[7]; + uint32_t banked_r13[7]; + uint32_t banked_r14[7]; /* These hold r8-r12. */ uint32_t usr_regs[5]; @@ -112,6 +119,9 @@ typedef struct CPUARMState { uint32_t c1_sys; /* System control register. */ uint32_t c1_coproc; /* Coprocessor access register. */ uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ + uint32_t c1_secfg; /* Secure configuration register. */ + uint32_t c1_sedbg; /* Secure debug enable register. */ + uint32_t c1_nseac; /* Non-secure access control register. */ uint32_t c2_base0; /* MMU translation table base 0. */ uint32_t c2_base1; /* MMU translation table base 1. */ uint32_t c2_control; /* MMU translation table base control. */ @@ -126,8 +136,14 @@ typedef struct CPUARMState { uint32_t c6_region[8]; /* MPU base/size registers. */ uint32_t c6_insn; /* Fault address registers. */ uint32_t c6_data; + uint32_t c7_par; /* Translation result. */ uint32_t c9_insn; /* Cache lockdown registers. */ uint32_t c9_data; + uint32_t c9_pmcr_data; /* Performance Monitor Control Register */ + uint32_t c9_useren; /* user enable register */ + uint32_t c9_inten; /* interrupt enable set/clear register */ + uint32_t c12_vbar; /* secure/nonsecure vector base address register. */ + uint32_t c12_mvbar; /* monitor vector base address register. */ uint32_t c13_fcse; /* FCSE PID. */ uint32_t c13_context; /* Context ID. */ uint32_t c13_tls1; /* User RW Thread register. */ @@ -148,15 +164,10 @@ typedef struct CPUARMState { int current_sp; int exception; int pending_exception; - void *nvic; } v7m; - /* Coprocessor IO used by peripherals */ - struct { - ARMReadCPFunc *cp_read; - ARMWriteCPFunc *cp_write; - void *opaque; - } cp[15]; + /* Minimal set of debug coprocessor state (cp14) */ + uint32_t cp14_dbgdidr; /* Thumb-2 EE state. */ uint32_t teecr; @@ -165,10 +176,6 @@ typedef struct CPUARMState { /* Internal CPU feature flags. */ uint32_t features; - /* Callback for vectored interrupt controller. */ - int (*get_irq_vector)(struct CPUARMState *); - void *irq_opaque; - /* VFP coprocessor state. */ struct { float64 regs[32]; @@ -181,12 +188,27 @@ typedef struct CPUARMState { /* scratch space when Tn are not sufficient. */ uint32_t scratch[8]; + /* fp_status is the "normal" fp status. standard_fp_status retains + * values corresponding to the ARM "Standard FPSCR Value", ie + * default-NaN, flush-to-zero, round-to-nearest and is used by + * any operations (generally Neon) which the architecture defines + * as controlled by the standard FPSCR value rather than the FPSCR. + * + * To avoid having to transfer exception bits around, we simply + * say that the FPSCR cumulative exception flags are the logical + * OR of the flags in the two fp statuses. This relies on the + * only thing which needs to read the exception flags being + * an explicit FPSCR read. + */ float_status fp_status; + float_status standard_fp_status; } vfp; + uint32_t exclusive_addr; + uint32_t exclusive_val; + uint32_t exclusive_high; #if defined(CONFIG_USER_ONLY) - struct mmon_state *mmon_entry; -#else - uint32_t mmon_addr; + uint32_t exclusive_test; + uint32_t exclusive_info; #endif /* iwMMXt coprocessor state. */ @@ -205,6 +227,14 @@ typedef struct CPUARMState { CPU_COMMON /* These fields after the common ones so they are preserved on reset. */ + + /* Coprocessor IO used by peripherals */ + struct { + ARMReadCPFunc *cp_read; + ARMWriteCPFunc *cp_write; + void *opaque; + } cp[15]; + void *nvic; struct arm_boot_info *boot_info; } CPUARMState; @@ -223,9 +253,8 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo, void *puc); int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, int mmu_idx, int is_softmuu); +#define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault -void cpu_lock(void); -void cpu_unlock(void); static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls) { env->cp15.c13_tls2 = newtls; @@ -299,11 +328,16 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) } } +/* Return the current FPSCR value. */ +uint32_t vfp_get_fpscr(CPUARMState *env); +void vfp_set_fpscr(CPUARMState *env, uint32_t val); + enum arm_cpu_mode { ARM_CPU_MODE_USR = 0x10, ARM_CPU_MODE_FIQ = 0x11, ARM_CPU_MODE_IRQ = 0x12, ARM_CPU_MODE_SVC = 0x13, + ARM_CPU_MODE_SMC = 0x16, ARM_CPU_MODE_ABT = 0x17, ARM_CPU_MODE_UND = 0x1b, ARM_CPU_MODE_SYS = 0x1f @@ -339,11 +373,17 @@ enum arm_features { ARM_FEATURE_THUMB2, ARM_FEATURE_MPU, /* Only has Memory Protection Unit, not full MMU. */ ARM_FEATURE_VFP3, + ARM_FEATURE_VFP_FP16, ARM_FEATURE_NEON, ARM_FEATURE_DIV, ARM_FEATURE_M, /* Microcontroller profile. */ ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ - ARM_FEATURE_THUMB2EE + ARM_FEATURE_THUMB2EE, + ARM_FEATURE_V7MP, /* v7 Multiprocessing Extensions */ + ARM_FEATURE_V4T, + ARM_FEATURE_V5, + ARM_FEATURE_STRONGARM, + ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */ }; static inline int arm_feature(CPUARMState *env, int feature) @@ -351,7 +391,7 @@ static inline int arm_feature(CPUARMState *env, int feature) return (env->features & (1u << feature)) != 0; } -void arm_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); +void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf); /* Interface between CPU and Interrupt controller. */ void armv7m_nvic_set_pending(void *opaque, int irq); @@ -374,6 +414,8 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum, #define ARM_CPUID_ARM946 0x41059461 #define ARM_CPUID_TI915T 0x54029152 #define ARM_CPUID_TI925T 0x54029252 +#define ARM_CPUID_SA1100 0x4401A11B +#define ARM_CPUID_SA1110 0x6901B119 #define ARM_CPUID_PXA250 0x69052100 #define ARM_CPUID_PXA255 0x69052d00 #define ARM_CPUID_PXA260 0x69052903 @@ -390,6 +432,7 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum, #define ARM_CPUID_ARM1136_R2 0x4107b362 #define ARM_CPUID_ARM11MPCORE 0x410fb022 #define ARM_CPUID_CORTEXA8 0x410fc080 +#define ARM_CPUID_CORTEXA8_R2 0x412fc083 #define ARM_CPUID_CORTEXA9 0x410fc090 #define ARM_CPUID_CORTEXM3 0x410fc231 #define ARM_CPUID_ANY 0xffffffff @@ -403,13 +446,16 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum, #define TARGET_PAGE_BITS 10 #endif +#define TARGET_PHYS_ADDR_SPACE_BITS 32 +#define TARGET_VIRT_ADDR_SPACE_BITS 32 + #define cpu_init cpu_arm_init #define cpu_exec cpu_arm_exec #define cpu_gen_code cpu_arm_gen_code #define cpu_signal_handler cpu_arm_signal_handler #define cpu_list arm_cpu_list -#define CPU_SAVE_VERSION 1 +#define CPU_SAVE_VERSION 3 /* MMU modes definitions */ #define MMU_MODE0_SUFFIX _kernel @@ -439,24 +485,57 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) #endif #include "cpu-all.h" -#include "exec-all.h" -static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) -{ - env->regs[15] = tb->pc; -} +/* Bit usage in the TB flags field: */ +#define ARM_TBFLAG_THUMB_SHIFT 0 +#define ARM_TBFLAG_THUMB_MASK (1 << ARM_TBFLAG_THUMB_SHIFT) +#define ARM_TBFLAG_VECLEN_SHIFT 1 +#define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT) +#define ARM_TBFLAG_VECSTRIDE_SHIFT 4 +#define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT) +#define ARM_TBFLAG_PRIV_SHIFT 6 +#define ARM_TBFLAG_PRIV_MASK (1 << ARM_TBFLAG_PRIV_SHIFT) +#define ARM_TBFLAG_VFPEN_SHIFT 7 +#define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT) +#define ARM_TBFLAG_CONDEXEC_SHIFT 8 +#define ARM_TBFLAG_CONDEXEC_MASK (0xff << ARM_TBFLAG_CONDEXEC_SHIFT) +/* Bits 31..16 are currently unused. */ + +/* some convenience accessor macros */ +#define ARM_TBFLAG_THUMB(F) \ + (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT) +#define ARM_TBFLAG_VECLEN(F) \ + (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT) +#define ARM_TBFLAG_VECSTRIDE(F) \ + (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT) +#define ARM_TBFLAG_PRIV(F) \ + (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT) +#define ARM_TBFLAG_VFPEN(F) \ + (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT) +#define ARM_TBFLAG_CONDEXEC(F) \ + (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT) static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc, target_ulong *cs_base, int *flags) { + int privmode; *pc = env->regs[15]; *cs_base = 0; - *flags = env->thumb | (env->vfp.vec_len << 1) - | (env->vfp.vec_stride << 4) | (env->condexec_bits << 8); - if ((env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) - *flags |= (1 << 6); - if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) - *flags |= (1 << 7); + *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT); + if (arm_feature(env, ARM_FEATURE_M)) { + privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1)); + } else { + privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR; + } + if (privmode) { + *flags |= ARM_TBFLAG_PRIV_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) { + *flags |= ARM_TBFLAG_VFPEN_MASK; + } } #endif diff --git a/target-arm/exec.h b/target-arm/exec.h index 710a2f9..07bfd57 100644 --- a/target-arm/exec.h +++ b/target-arm/exec.h @@ -14,17 +14,12 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "config.h" #include "dyngen-exec.h" register struct CPUARMState *env asm(AREG0); -register uint32_t T0 asm(AREG1); -register uint32_t T1 asm(AREG2); - -#define M0 env->iwmmxt.val #include "cpu.h" #include "exec-all.h" @@ -61,3 +56,8 @@ static inline int cpu_halted(CPUState *env) { #endif void raise_exception(int); +static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb) +{ + env->regs[15] = tb->pc; +} + diff --git a/target-arm/helper.c b/target-arm/helper.c index 154aa46..3731029 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -5,11 +5,21 @@ #include "cpu.h" #include "exec-all.h" #include "gdbstub.h" -#include "helpers.h" +#include "helper.h" #include "qemu-common.h" +#include "host-utils.h" +#if !defined(CONFIG_USER_ONLY) +//#include "hw/loader.h" #ifdef CONFIG_TRACE #include "android-trace.h" #endif +#endif + +static uint32_t cortexa9_cp15_c0_c1[8] = +{ 0x1031, 0x11, 0x000, 0, 0x00100103, 0x20000000, 0x01230000, 0x00002111 }; + +static uint32_t cortexa9_cp15_c0_c2[8] = +{ 0x00101111, 0x13112111, 0x21232041, 0x11112131, 0x00111142, 0, 0, 0 }; static uint32_t cortexa8_cp15_c0_c1[8] = { 0x1031, 0x11, 0x400, 0, 0x31100003, 0x20000000, 0x01202000, 0x11 }; @@ -17,6 +27,9 @@ static uint32_t cortexa8_cp15_c0_c1[8] = static uint32_t cortexa8_cp15_c0_c2[8] = { 0x00101111, 0x12112111, 0x21232031, 0x11112131, 0x00111142, 0, 0, 0 }; +static uint32_t cortexa8r2_cp15_c0_c2[8] = +{ 0x00101111, 0x12112111, 0x21232031, 0x11112131, 0x00011142, 0, 0, 0 }; + static uint32_t mpcore_cp15_c0_c1[8] = { 0x111, 0x1, 0, 0x2, 0x01100103, 0x10020302, 0x01222000, 0 }; @@ -41,17 +54,23 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) env->cp15.c0_cpuid = id; switch (id) { case ARM_CPUID_ARM926: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_VFP); env->vfp.xregs[ARM_VFP_FPSID] = 0x41011090; env->cp15.c0_cachetype = 0x1dd20d2; env->cp15.c1_sys = 0x00090078; break; case ARM_CPUID_ARM946: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_MPU); env->cp15.c0_cachetype = 0x0f004006; env->cp15.c1_sys = 0x00000078; break; case ARM_CPUID_ARM1026: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_VFP); set_feature(env, ARM_FEATURE_AUXCR); env->vfp.xregs[ARM_VFP_FPSID] = 0x410110a0; @@ -60,6 +79,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) break; case ARM_CPUID_ARM1136_R2: case ARM_CPUID_ARM1136: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_V6); set_feature(env, ARM_FEATURE_VFP); set_feature(env, ARM_FEATURE_AUXCR); @@ -69,8 +90,11 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) memcpy(env->cp15.c0_c1, arm1136_cp15_c0_c1, 8 * sizeof(uint32_t)); memcpy(env->cp15.c0_c2, arm1136_cp15_c0_c2, 8 * sizeof(uint32_t)); env->cp15.c0_cachetype = 0x1dd20d2; + env->cp15.c1_sys = 0x00050078; break; case ARM_CPUID_ARM11MPCORE: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_V6); set_feature(env, ARM_FEATURE_V6K); set_feature(env, ARM_FEATURE_VFP); @@ -83,6 +107,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) env->cp15.c0_cachetype = 0x1dd20d2; break; case ARM_CPUID_CORTEXA8: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_V6); set_feature(env, ARM_FEATURE_V6K); set_feature(env, ARM_FEATURE_V7); @@ -92,6 +118,7 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) set_feature(env, ARM_FEATURE_VFP3); set_feature(env, ARM_FEATURE_NEON); set_feature(env, ARM_FEATURE_THUMB2EE); + set_feature(env, ARM_FEATURE_TRUSTZONE); env->vfp.xregs[ARM_VFP_FPSID] = 0x410330c0; env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222; env->vfp.xregs[ARM_VFP_MVFR1] = 0x00011100; @@ -102,8 +129,66 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) env->cp15.c0_ccsid[0] = 0xe007e01a; /* 16k L1 dcache. */ env->cp15.c0_ccsid[1] = 0x2007e01a; /* 16k L1 icache. */ env->cp15.c0_ccsid[2] = 0xf0000000; /* No L2 icache. */ + env->cp15.c1_sys = 0x00c50078; + break; + case ARM_CPUID_CORTEXA8_R2: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); + set_feature(env, ARM_FEATURE_V6); + set_feature(env, ARM_FEATURE_V6K); + set_feature(env, ARM_FEATURE_V7); + set_feature(env, ARM_FEATURE_AUXCR); + set_feature(env, ARM_FEATURE_THUMB2); + set_feature(env, ARM_FEATURE_VFP); + set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_NEON); + set_feature(env, ARM_FEATURE_THUMB2EE); + set_feature(env, ARM_FEATURE_TRUSTZONE); + env->vfp.xregs[ARM_VFP_FPSID] = 0x410330c2; + env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222; + env->vfp.xregs[ARM_VFP_MVFR1] = 0x00011111; + memcpy(env->cp15.c0_c1, cortexa8_cp15_c0_c1, 8 * sizeof(uint32_t)); + memcpy(env->cp15.c0_c2, cortexa8r2_cp15_c0_c2, 8 * sizeof(uint32_t)); + env->cp15.c0_cachetype = 0x82048004; + env->cp15.c0_clid = (1 << 27) | (2 << 24) | (4 << 3) | 3; + env->cp15.c0_ccsid[0] = 0xe007e01a; /* 16k L1 dcache. */ + env->cp15.c0_ccsid[1] = 0x2007e01a; /* 16k L1 icache. */ + env->cp15.c0_ccsid[2] = 0xf03fe03a; /* 256k L2 cache. */ + env->cp15.c1_sys = 0x00c50078; + break; + case ARM_CPUID_CORTEXA9: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); + set_feature(env, ARM_FEATURE_V6); + set_feature(env, ARM_FEATURE_V6K); + set_feature(env, ARM_FEATURE_V7); + set_feature(env, ARM_FEATURE_AUXCR); + set_feature(env, ARM_FEATURE_THUMB2); + set_feature(env, ARM_FEATURE_VFP); + set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_VFP_FP16); + set_feature(env, ARM_FEATURE_NEON); + set_feature(env, ARM_FEATURE_THUMB2EE); + /* Note that A9 supports the MP extensions even for + * A9UP and single-core A9MP (which are both different + * and valid configurations; we don't model A9UP). + */ + set_feature(env, ARM_FEATURE_V7MP); + set_feature(env, ARM_FEATURE_TRUSTZONE); + env->vfp.xregs[ARM_VFP_FPSID] = 0x41034000; /* Guess */ + env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222; + env->vfp.xregs[ARM_VFP_MVFR1] = 0x01111111; + memcpy(env->cp15.c0_c1, cortexa9_cp15_c0_c1, 8 * sizeof(uint32_t)); + memcpy(env->cp15.c0_c2, cortexa9_cp15_c0_c2, 8 * sizeof(uint32_t)); + env->cp15.c0_cachetype = 0x80038003; + env->cp15.c0_clid = (1 << 27) | (1 << 24) | 3; + env->cp15.c0_ccsid[0] = 0xe00fe015; /* 16k L1 dcache. */ + env->cp15.c0_ccsid[1] = 0x200fe015; /* 16k L1 icache. */ + env->cp15.c1_sys = 0x00c50078; break; case ARM_CPUID_CORTEXM3: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_V6); set_feature(env, ARM_FEATURE_THUMB2); set_feature(env, ARM_FEATURE_V7); @@ -111,18 +196,23 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) set_feature(env, ARM_FEATURE_DIV); break; case ARM_CPUID_ANY: /* For userspace emulation. */ + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_V6); set_feature(env, ARM_FEATURE_V6K); set_feature(env, ARM_FEATURE_V7); set_feature(env, ARM_FEATURE_THUMB2); set_feature(env, ARM_FEATURE_VFP); set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_VFP_FP16); set_feature(env, ARM_FEATURE_NEON); set_feature(env, ARM_FEATURE_THUMB2EE); set_feature(env, ARM_FEATURE_DIV); + set_feature(env, ARM_FEATURE_V7MP); break; case ARM_CPUID_TI915T: case ARM_CPUID_TI925T: + set_feature(env, ARM_FEATURE_V4T); set_feature(env, ARM_FEATURE_OMAPCP); env->cp15.c0_cpuid = ARM_CPUID_TI925T; /* Depends on wiring. */ env->cp15.c0_cachetype = 0x5109149; @@ -135,6 +225,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) case ARM_CPUID_PXA260: case ARM_CPUID_PXA261: case ARM_CPUID_PXA262: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_XSCALE); /* JTAG_ID is ((id << 28) | 0x09265013) */ env->cp15.c0_cachetype = 0xd172172; @@ -146,6 +238,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) case ARM_CPUID_PXA270_B1: case ARM_CPUID_PXA270_C0: case ARM_CPUID_PXA270_C5: + set_feature(env, ARM_FEATURE_V4T); + set_feature(env, ARM_FEATURE_V5); set_feature(env, ARM_FEATURE_XSCALE); /* JTAG_ID is ((id << 28) | 0x09265013) */ set_feature(env, ARM_FEATURE_IWMMXT); @@ -153,6 +247,11 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id) env->cp15.c0_cachetype = 0xd172172; env->cp15.c1_sys = 0x00000078; break; + case ARM_CPUID_SA1100: + case ARM_CPUID_SA1110: + set_feature(env, ARM_FEATURE_STRONGARM); + env->cp15.c1_sys = 0x00000070; + break; default: cpu_abort(env, "Bad CPU ID: %x\n", id); break; @@ -172,20 +271,52 @@ void cpu_reset(CPUARMState *env) memset(env, 0, offsetof(CPUARMState, breakpoints)); if (id) cpu_reset_model_id(env, id); + /* DBGDIDR : we implement nothing, and just mirror the main ID + * register's Variant and Revision fields. + */ + env->cp14_dbgdidr = (id >> 16 & 0xf0) | 0xf; #if defined (CONFIG_USER_ONLY) env->uncached_cpsr = ARM_CPU_MODE_USR; + /* For user mode we must enable access to coprocessors */ env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30; + if (arm_feature(env, ARM_FEATURE_IWMMXT)) { + env->cp15.c15_cpar = 3; + } else if (arm_feature(env, ARM_FEATURE_XSCALE)) { + env->cp15.c15_cpar = 1; + } #else /* SVC mode with interrupts disabled. */ env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I; /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is - clear at reset. */ - if (IS_M(env)) + clear at reset. Initial SP and PC are loaded from ROM. */ + if (IS_M(env)) { + uint32_t pc; + uint8_t *rom; env->uncached_cpsr &= ~CPSR_I; +#ifndef CONFIG_ANDROID /* No hw/loader.h and no ROM support for now on Android */ + rom = rom_ptr(0); + if (rom) { + /* We should really use ldl_phys here, in case the guest + modified flash and reset itself. However images + loaded via -kenrel have not been copied yet, so load the + values directly from there. */ + env->regs[13] = ldl_p(rom); + pc = ldl_p(rom + 4); + env->thumb = pc & 1; + env->regs[15] = pc & ~1; + } +#endif + } env->vfp.xregs[ARM_VFP_FPEXC] = 0; env->cp15.c2_base_mask = 0xffffc000u; #endif - env->regs[15] = 0; + set_flush_to_zero(1, &env->vfp.standard_fp_status); + set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); + set_default_nan_mode(1, &env->vfp.standard_fp_status); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.fp_status); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.standard_fp_status); tlb_flush(env, 1); } @@ -236,7 +367,7 @@ static int vfp_gdb_set_reg(CPUState *env, uint8_t *buf, int reg) switch (reg - nregs) { case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4; case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4; - case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf); return 4; + case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4; } return 0; } @@ -288,8 +419,12 @@ static const struct arm_cpu_t arm_cpu_names[] = { { ARM_CPUID_ARM11MPCORE, "arm11mpcore"}, { ARM_CPUID_CORTEXM3, "cortex-m3"}, { ARM_CPUID_CORTEXA8, "cortex-a8"}, + { ARM_CPUID_CORTEXA8_R2, "cortex-a8-r2"}, + { ARM_CPUID_CORTEXA9, "cortex-a9"}, { ARM_CPUID_TI925T, "ti925t" }, { ARM_CPUID_PXA250, "pxa250" }, + { ARM_CPUID_SA1100, "sa1100" }, + { ARM_CPUID_SA1110, "sa1110" }, { ARM_CPUID_PXA255, "pxa255" }, { ARM_CPUID_PXA260, "pxa260" }, { ARM_CPUID_PXA261, "pxa261" }, @@ -305,7 +440,7 @@ static const struct arm_cpu_t arm_cpu_names[] = { { 0, NULL} }; -void arm_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf) { int i; @@ -397,16 +532,15 @@ uint32_t HELPER(uxtb16)(uint32_t x) uint32_t HELPER(clz)(uint32_t x) { - int count; - for (count = 32; x; count--) - x >>= 1; - return count; + return clz32(x); } int32_t HELPER(sdiv)(int32_t num, int32_t den) { if (den == 0) return 0; + if (num == INT_MIN && den == -1) + return INT_MIN; return num / den; } @@ -444,16 +578,6 @@ void do_interrupt (CPUState *env) env->exception_index = -1; } -/* Structure used to record exclusive memory locations. */ -typedef struct mmon_state { - struct mmon_state *next; - CPUARMState *cpu_env; - uint32_t addr; -} mmon_state; - -/* Chain of current locks. */ -static mmon_state* mmon_head = NULL; - int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw, int mmu_idx, int is_softmmu) { @@ -467,82 +591,7 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw, return 1; } -static void allocate_mmon_state(CPUState *env) -{ - env->mmon_entry = malloc(sizeof (mmon_state)); - memset (env->mmon_entry, 0, sizeof (mmon_state)); - env->mmon_entry->cpu_env = env; - mmon_head = env->mmon_entry; -} - -/* Flush any monitor locks for the specified address. */ -static void flush_mmon(uint32_t addr) -{ - mmon_state *mon; - - for (mon = mmon_head; mon; mon = mon->next) - { - if (mon->addr != addr) - continue; - - mon->addr = 0; - break; - } -} - -/* Mark an address for exclusive access. */ -void HELPER(mark_exclusive)(CPUState *env, uint32_t addr) -{ - if (!env->mmon_entry) - allocate_mmon_state(env); - /* Clear any previous locks. */ - flush_mmon(addr); - env->mmon_entry->addr = addr; -} - -/* Test if an exclusive address is still exclusive. Returns zero - if the address is still exclusive. */ -uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr) -{ - int res; - - if (!env->mmon_entry) - return 1; - if (env->mmon_entry->addr == addr) - res = 0; - else - res = 1; - flush_mmon(addr); - return res; -} - -void HELPER(clrex)(CPUState *env) -{ - if (!(env->mmon_entry && env->mmon_entry->addr)) - return; - flush_mmon(env->mmon_entry->addr); -} - -target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) -{ - return addr; -} - /* These should probably raise undefined insn exceptions. */ -void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val) -{ - int op1 = (insn >> 8) & 0xf; - cpu_abort(env, "cp%i insn %08x\n", op1, insn); - return; -} - -uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn) -{ - int op1 = (insn >> 8) & 0xf; - cpu_abort(env, "cp%i insn %08x\n", op1, insn); - return 0; -} - void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) { cpu_abort(env, "cp15 insn %08x\n", insn); @@ -604,6 +653,8 @@ static inline int bank_number (int mode) return 4; case ARM_CPU_MODE_FIQ: return 5; + case ARM_CPU_MODE_SMC: + return 6; } cpu_abort(cpu_single_env, "Bad mode %x\n", mode); return -1; @@ -670,7 +721,7 @@ static void do_v7m_exception_exit(CPUARMState *env) type = env->regs[15]; if (env->v7m.exception != 0) - armv7m_nvic_complete_irq(env->v7m.nvic, env->v7m.exception); + armv7m_nvic_complete_irq(env->nvic, env->v7m.exception); /* Switch to the target stack. */ switch_v7m_sp(env, (type & 4) != 0); @@ -712,15 +763,15 @@ static void do_interrupt_v7m(CPUARMState *env) one we're raising. */ switch (env->exception_index) { case EXCP_UDEF: - armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_USAGE); + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); return; case EXCP_SWI: env->regs[15] += 2; - armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_SVC); + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC); return; case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: - armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_MEM); + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM); return; case EXCP_BKPT: if (semihosting_enabled) { @@ -732,10 +783,10 @@ static void do_interrupt_v7m(CPUARMState *env) return; } } - armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_DEBUG); + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG); return; case EXCP_IRQ: - env->v7m.exception = armv7m_nvic_acknowledge_irq(env->v7m.nvic); + env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic); break; case EXCP_EXCEPTION_EXIT: do_v7m_exception_exit(env); @@ -859,23 +910,52 @@ void do_interrupt(CPUARMState *env) mask = CPSR_A | CPSR_I | CPSR_F; offset = 4; break; + case EXCP_SMC: + if (semihosting_enabled) { + cpu_abort(env, "SMC handling under semihosting not implemented\n"); + return; + } + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SMC) { + env->cp15.c1_secfg &= ~1; + } + offset = env->thumb ? 2 : 0; + new_mode = ARM_CPU_MODE_SMC; + addr = 0x08; + mask = CPSR_A | CPSR_I | CPSR_F; + break; default: cpu_abort(env, "Unhandled exception 0x%x\n", env->exception_index); return; /* Never happens. Keep compiler happy. */ } + if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) { + if (new_mode == ARM_CPU_MODE_SMC || + (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SMC) { + addr += env->cp15.c12_mvbar; + } else { + if (env->cp15.c1_sys & (1 << 13)) { + addr += 0xffff0000; + } else { + addr += env->cp15.c12_vbar; + } + } + } else { /* High vectors. */ if (env->cp15.c1_sys & (1 << 13)) { addr += 0xffff0000; + } } switch_mode (env, new_mode); env->spsr = cpsr_read(env); /* Clear IT bits. */ env->condexec_bits = 0; - /* Switch to the new mode, and switch to Arm mode. */ - /* ??? Thumb interrupt handlers not implemented. */ + /* Switch to the new mode, and to the correct instruction set. */ env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode; env->uncached_cpsr |= mask; - env->thumb = 0; + /* this is a lie, as the was no c1_sys on V4T/V5, but who cares + * and we should just guard the thumb mode on V4 */ + if (arm_feature(env, ARM_FEATURE_V4T)) { + env->thumb = (env->cp15.c1_sys & (1 << 30)) != 0; + } env->regs[14] = env->regs[15] + offset; env->regs[15] = addr; env->interrupt_request |= CPU_INTERRUPT_EXITTB; @@ -947,7 +1027,8 @@ static uint32_t get_level1_table_address(CPUState *env, uint32_t address) } static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type, - int is_user, uint32_t *phys_ptr, int *prot) + int is_user, uint32_t *phys_ptr, int *prot, + target_ulong *page_size) { int code; uint32_t table; @@ -980,6 +1061,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type, phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); ap = (desc >> 10) & 3; code = 13; + *page_size = 1024 * 1024; } else { /* Lookup l2 entry. */ if (type == 1) { @@ -997,10 +1079,12 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type, case 1: /* 64k page. */ phys_addr = (desc & 0xffff0000) | (address & 0xffff); ap = (desc >> (4 + ((address >> 13) & 6))) & 3; + *page_size = 0x10000; break; case 2: /* 4k page. */ phys_addr = (desc & 0xfffff000) | (address & 0xfff); ap = (desc >> (4 + ((address >> 13) & 6))) & 3; + *page_size = 0x1000; break; case 3: /* 1k page. */ if (type == 1) { @@ -1015,6 +1099,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type, phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); } ap = (desc >> 4) & 3; + *page_size = 0x400; break; default: /* Never happens, but compiler isn't smart enough to tell. */ @@ -1027,6 +1112,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type, /* Access permission fault. */ goto do_fault; } + *prot |= PAGE_EXEC; *phys_ptr = phys_addr; return 0; do_fault: @@ -1034,7 +1120,8 @@ do_fault: } static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, - int is_user, uint32_t *phys_ptr, int *prot) + int is_user, uint32_t *phys_ptr, int *prot, + target_ulong *page_size) { int code; uint32_t table; @@ -1050,7 +1137,7 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, table = get_level1_table_address(env, address); desc = ldl_phys(table); type = (desc & 3); - if (type == 0) { + if (type == 0 || type == 3) { /* Section translation fault. */ code = 5; domain = 0; @@ -1074,9 +1161,11 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, if (desc & (1 << 18)) { /* Supersection. */ phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); + *page_size = 0x1000000; } else { /* Section. */ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); + *page_size = 0x100000; } ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); xn = desc & (1 << 4); @@ -1093,10 +1182,12 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, case 1: /* 64k page. */ phys_addr = (desc & 0xffff0000) | (address & 0xffff); xn = desc & (1 << 15); + *page_size = 0x10000; break; case 2: case 3: /* 4k page. */ phys_addr = (desc & 0xfffff000) | (address & 0xfff); xn = desc & 1; + *page_size = 0x1000; break; default: /* Never happens, but compiler isn't smart enough to tell. */ @@ -1104,6 +1195,9 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, } code = 15; } + if (domain == 3) { + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + } else { if (xn && access_type == 2) goto do_fault; @@ -1117,6 +1211,10 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type, if (!*prot) { /* Access permission fault. */ goto do_fault; + } + if (!xn) { + *prot |= PAGE_EXEC; + } } *phys_ptr = phys_addr; return 0; @@ -1180,12 +1278,22 @@ static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type, /* Bad permission. */ return 1; } + *prot |= PAGE_EXEC; return 0; } -static inline int get_phys_addr(CPUState *env, uint32_t address, +#ifdef CONFIG_GLES2 +int get_phys_addr(CPUState *env, uint32_t address, + int access_type, int is_user, + uint32_t *phys_ptr, int *prot, + target_ulong *page_size); +#else +static +#endif +int get_phys_addr(CPUState *env, uint32_t address, int access_type, int is_user, - uint32_t *phys_ptr, int *prot) + uint32_t *phys_ptr, int *prot, + target_ulong *page_size) { /* Fast Context Switch Extension. */ if (address < 0x02000000) @@ -1194,35 +1302,39 @@ static inline int get_phys_addr(CPUState *env, uint32_t address, if ((env->cp15.c1_sys & 1) == 0) { /* MMU/MPU disabled. */ *phys_ptr = address; - *prot = PAGE_READ | PAGE_WRITE; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + *page_size = TARGET_PAGE_SIZE; return 0; } else if (arm_feature(env, ARM_FEATURE_MPU)) { + *page_size = TARGET_PAGE_SIZE; return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr, prot); } else if (env->cp15.c1_sys & (1 << 23)) { return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr, - prot); + prot, page_size); } else { return get_phys_addr_v5(env, address, access_type, is_user, phys_ptr, - prot); + prot, page_size); } } int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int access_type, int mmu_idx, int is_softmmu) { - uint32_t phys_addr = 0; + uint32_t phys_addr; + target_ulong page_size; int prot; int ret, is_user; is_user = mmu_idx == MMU_USER_IDX; - ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot); + ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot, + &page_size); if (ret == 0) { /* Map a single [sub]page. */ phys_addr &= ~(uint32_t)0x3ff; address &= ~(uint32_t)0x3ff; - return tlb_set_page (env, address, phys_addr, prot, mmu_idx, - is_softmmu); + tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size); + return 0; } if (access_type == 2) { @@ -1241,11 +1353,12 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) { - uint32_t phys_addr = 0; + uint32_t phys_addr; + target_ulong page_size; int prot; int ret; - ret = get_phys_addr(env, addr, 0, 0, &phys_addr, &prot); + ret = get_phys_addr(env, addr, 0, 0, &phys_addr, &prot, &page_size); if (ret != 0) return -1; @@ -1253,49 +1366,6 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr) return phys_addr; } -/* Not really implemented. Need to figure out a sane way of doing this. - Maybe add generic watchpoint support and use that. */ - -void HELPER(mark_exclusive)(CPUState *env, uint32_t addr) -{ - env->mmon_addr = addr; -} - -uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr) -{ - return (env->mmon_addr != addr); -} - -void HELPER(clrex)(CPUState *env) -{ - env->mmon_addr = -1; -} - -void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val) -{ - int cp_num = (insn >> 8) & 0xf; - int cp_info = (insn >> 5) & 7; - int src = (insn >> 16) & 0xf; - int operand = insn & 0xf; - - if (env->cp[cp_num].cp_write) - env->cp[cp_num].cp_write(env->cp[cp_num].opaque, - cp_info, src, operand, val); -} - -uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn) -{ - int cp_num = (insn >> 8) & 0xf; - int cp_info = (insn >> 5) & 7; - int dest = (insn >> 16) & 0xf; - int operand = insn & 0xf; - - if (env->cp[cp_num].cp_read) - return env->cp[cp_num].cp_read(env->cp[cp_num].opaque, - cp_info, dest, operand); - return 0; -} - /* Return basic MPU access permission bits. */ static uint32_t simple_mpu_ap_bits(uint32_t val) { @@ -1349,17 +1419,19 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) } goto bad_reg; case 1: /* System configuration. */ + switch (crm) { + case 0: if (arm_feature(env, ARM_FEATURE_OMAPCP)) op2 = 0; switch (op2) { case 0: - if (!arm_feature(env, ARM_FEATURE_XSCALE) || crm == 0) + if (!arm_feature(env, ARM_FEATURE_XSCALE)) env->cp15.c1_sys = val; /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(env, 1); break; - case 1: /* Auxiliary cotrol register. */ + case 1: /* Auxiliary control register. */ if (arm_feature(env, ARM_FEATURE_XSCALE)) { env->cp15.c1_xscaleauxcr = val; break; @@ -1373,6 +1445,34 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) env->cp15.c1_coproc = val; /* ??? Is this safe when called from within a TB? */ tb_flush(env); + } + break; + default: + goto bad_reg; + } + break; + case 1: + if (!arm_feature(env, ARM_FEATURE_TRUSTZONE) + || (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) + goto bad_reg; + switch (op2) { + case 0: /* Secure configuration register. */ + if (env->cp15.c1_secfg & 1) + goto bad_reg; + env->cp15.c1_secfg = val; + break; + case 1: /* Secure debug enable register. */ + if (env->cp15.c1_secfg & 1) + goto bad_reg; + env->cp15.c1_sedbg = val; + break; + case 2: /* Nonsecure access control register. */ + if (env->cp15.c1_secfg & 1) + goto bad_reg; + env->cp15.c1_nseac = val; + break; + default: + goto bad_reg; } break; default: @@ -1468,8 +1568,49 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) case 7: /* Cache control. */ env->cp15.c15_i_max = 0x000; env->cp15.c15_i_min = 0xff0; - /* No cache, so nothing to do. */ - /* ??? MPCore has VA to PA translation functions. */ + if (op1 != 0) { + goto bad_reg; + } + /* No cache, so nothing to do except VA->PA translations. */ + if (arm_feature(env, ARM_FEATURE_V6K)) { + switch (crm) { + case 4: + if (arm_feature(env, ARM_FEATURE_V7)) { + env->cp15.c7_par = val & 0xfffff6ff; + } else { + env->cp15.c7_par = val & 0xfffff1ff; + } + break; + case 8: { + uint32_t phys_addr; + target_ulong page_size; + int prot; + int ret, is_user = op2 & 2; + int access_type = op2 & 1; + + if (op2 & 4) { + /* Other states are only available with TrustZone */ + goto bad_reg; + } + ret = get_phys_addr(env, val, access_type, is_user, + &phys_addr, &prot, &page_size); + if (ret == 0) { + /* We do not set any attribute bits in the PAR */ + if (page_size == (1 << 24) + && arm_feature(env, ARM_FEATURE_V7)) { + env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1; + } else { + env->cp15.c7_par = phys_addr & 0xfffff000; + } + } else { + env->cp15.c7_par = ((ret & (10 << 1)) >> 5) | + ((ret & (12 << 1)) >> 6) | + ((ret & 0xf) << 1) | 1; + } + break; + } + } + } break; case 8: /* MMU TLB control. */ switch (op2) { @@ -1477,18 +1618,7 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) tlb_flush(env, 0); break; case 1: /* Invalidate single TLB entry. */ -#if 0 - /* ??? This is wrong for large pages and sections. */ - /* As an ugly hack to make linux work we always flush a 4K - pages. */ - val &= 0xfffff000; - tlb_flush_page(env, val); - tlb_flush_page(env, val + 0x400); - tlb_flush_page(env, val + 0x800); - tlb_flush_page(env, val + 0xc00); -#else - tlb_flush(env, 1); -#endif + tlb_flush_page(env, val & TARGET_PAGE_MASK); break; case 2: /* Invalidate on ASID. */ tlb_flush(env, val == 0); @@ -1504,6 +1634,8 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) case 9: if (arm_feature(env, ARM_FEATURE_OMAPCP)) break; + if (arm_feature(env, ARM_FEATURE_STRONGARM)) + break; /* Ignore ReadBuffer access */ switch (crm) { case 0: /* Cache lockdown. */ switch (op1) { @@ -1520,15 +1652,86 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) } break; case 1: /* L2 cache. */ - /* Ignore writes to L2 lockdown/auxiliary registers. */ + switch (op2) { + case 0: /* L2 cache lockdown */ + case 2: /* L2 cache auxiliary control */ + /* ignore */ + break; + default: + goto bad_reg; + } break; default: goto bad_reg; } break; case 1: /* TCM memory region registers. */ + case 2: /* Not implemented. */ goto bad_reg; + case 12: /* performance monitor control */ + if (arm_feature(env, ARM_FEATURE_V7)) { + switch (op2) { + case 0: /* performance monitor control */ + env->cp15.c9_pmcr_data = val; + break; + case 1: /* count enable set */ + case 2: /* count enable clear */ + case 3: /* overflow flag status */ + case 4: /* software increment */ + case 5: /* performance counter selection */ + /* not implemented */ + goto bad_reg; + default: + goto bad_reg; + } + } else { + goto bad_reg; + } + break; + case 13: /* performance counters */ + if (arm_feature(env, ARM_FEATURE_V7)) { + switch (op2) { + case 0: /* cycle count */ + case 1: /* event selection */ + case 2: /* performance monitor count */ + /* not implemented */ + goto bad_reg; + default: + goto bad_reg; + } + } else { + goto bad_reg; + } + break; + case 14: /* performance monitor control */ + if (arm_feature(env, ARM_FEATURE_V7)) { + switch (op2) { + case 0: /* user enable */ + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { + goto bad_reg; + } + env->cp15.c9_useren = val & 1; + break; + case 1: /* interrupt enable set */ + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { + goto bad_reg; + } + env->cp15.c9_inten |= val & 0xf; + break; + case 2: /* interrupt enable clear */ + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { + goto bad_reg; + } + env->cp15.c9_inten &= ~(val & 0xf); + break; + default: + goto bad_reg; + } + } else { + goto bad_reg; + } + break; default: goto bad_reg; } @@ -1537,6 +1740,27 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) /* ??? TLB lockdown not implemented. */ break; case 12: /* Reserved. */ + if (!op1 && !crm) { + switch (op2) { + case 0: + if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)) { + goto bad_reg; + } + env->cp15.c12_vbar = val & ~0x1f; + break; + case 1: + if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)) { + goto bad_reg; + } + if (!(env->cp15.c1_secfg & 1)) { + env->cp15.c12_mvbar = val & ~0x1f; + } + break; + default: + goto bad_reg; + } + break; + } goto bad_reg; case 13: /* Process ID. */ switch (op2) { @@ -1555,15 +1779,6 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val) tlb_flush(env, 0); env->cp15.c13_context = val; break; - case 2: - env->cp15.c13_tls1 = val; - break; - case 3: - env->cp15.c13_tls2 = val; - break; - case 4: - env->cp15.c13_tls3 = val; - break; default: goto bad_reg; } @@ -1640,8 +1855,28 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) return 0; case 3: /* TLB type register. */ return 0; /* No lockable TLB entries. */ - case 5: /* CPU ID */ - return env->cpu_index; + case 5: /* MPIDR */ + /* The MPIDR was standardised in v7; prior to + * this it was implemented only in the 11MPCore. + * For all other pre-v7 cores it does not exist. + */ + if (arm_feature(env, ARM_FEATURE_V7) || + ARM_CPUID(env) == ARM_CPUID_ARM11MPCORE) { + int mpidr = env->cpu_index; + /* We don't support setting cluster ID ([8..11]) + * so these bits always RAZ. + */ + if (arm_feature(env, ARM_FEATURE_V7MP)) { + mpidr |= (1 << 31); + /* Cores which are uniprocessor (non-coherent) + * but still implement the MP extensions set + * bit 30. (For instance, A9UP.) However we do + * not currently model any of those cores. + */ + } + return mpidr; + } + /* otherwise fall through to the unimplemented-reg case */ default: goto bad_reg; } @@ -1658,6 +1893,7 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) default: goto bad_reg; } + break; case 1: /* These registers aren't documented on arm11 cores. However Linux looks at them anyway. */ @@ -1684,7 +1920,10 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) default: goto bad_reg; } + break; case 1: /* System configuration. */ + switch (crm) { + case 0: if (arm_feature(env, ARM_FEATURE_OMAPCP)) op2 = 0; switch (op2) { @@ -1704,10 +1943,14 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) case ARM_CPUID_ARM11MPCORE: return 1; case ARM_CPUID_CORTEXA8: + case ARM_CPUID_CORTEXA8_R2: return 2; + case ARM_CPUID_CORTEXA9: + return 0; default: goto bad_reg; } + break; case 2: /* Coprocessor access register. */ if (arm_feature(env, ARM_FEATURE_XSCALE)) goto bad_reg; @@ -1715,6 +1958,30 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) default: goto bad_reg; } + break; + case 1: + if (!arm_feature(env, ARM_FEATURE_TRUSTZONE) + || (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) + goto bad_reg; + switch (op2) { + case 0: /* Secure configuration register. */ + if (env->cp15.c1_secfg & 1) + goto bad_reg; + return env->cp15.c1_secfg; + case 1: /* Secure debug enable register. */ + if (env->cp15.c1_secfg & 1) + goto bad_reg; + return env->cp15.c1_sedbg; + case 2: /* Nonsecure access control register. */ + return env->cp15.c1_nseac; + default: + goto bad_reg; + } + break; + default: + goto bad_reg; + } + break; case 2: /* MMU Page table control / MPU cache control. */ if (arm_feature(env, ARM_FEATURE_MPU)) { switch (op2) { @@ -1781,34 +2048,37 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) if (arm_feature(env, ARM_FEATURE_V6)) { /* Watchpoint Fault Adrress. */ return 0; /* Not implemented. */ - } else { + } /* Instruction Fault Adrress. */ /* Arm9 doesn't have an IFAR, but implementing it anyway shouldn't do any harm. */ return env->cp15.c6_insn; - } case 2: if (arm_feature(env, ARM_FEATURE_V6)) { /* Instruction Fault Adrress. */ return env->cp15.c6_insn; - } else { - goto bad_reg; } + goto bad_reg; default: goto bad_reg; } } case 7: /* Cache control. */ - /* FIXME: Should only clear Z flag if destination is r15. */ + if (crm == 4 && op1 == 0 && op2 == 0) { + return env->cp15.c7_par; + } + if (((insn >> 12) & 0xf) == 0xf) /* clear ZF only if destination is r15 */ env->ZF = 0; return 0; case 8: /* MMU TLB control. */ goto bad_reg; case 9: /* Cache lockdown. */ switch (op1) { - case 0: /* L1 cache. */ + case 0: if (arm_feature(env, ARM_FEATURE_OMAPCP)) return 0; + switch (crm) { + case 0: /* L1 cache */ switch (op2) { case 0: return env->cp15.c9_data; @@ -1817,6 +2087,37 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) default: goto bad_reg; } + break; + case 12: + switch (op2) { + case 0: + return env->cp15.c9_pmcr_data; + default: + goto bad_reg; + } + break; + case 14: /* performance monitor control */ + if (arm_feature(env, ARM_FEATURE_V7)) { + switch (op2) { + case 0: /* user enable */ + return env->cp15.c9_useren; + case 1: /* interrupt enable set */ + case 2: /* interrupt enable clear */ + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { + goto bad_reg; + } + return env->cp15.c9_inten; + default: + goto bad_reg; + } + } else { + goto bad_reg; + } + break; + default: + goto bad_reg; + } + break; case 1: /* L2 cache */ if (crm != 0) goto bad_reg; @@ -1830,6 +2131,22 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) return 0; case 11: /* TCM DMA control. */ case 12: /* Reserved. */ + if (!op1 && !crm) { + switch (op2) { + case 0: /* secure or nonsecure vector base address */ + if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) { + return env->cp15.c12_vbar; + } + break; + case 1: /* monitor vector base address */ + if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) { + return env->cp15.c12_mvbar; + } + break; + default: + break; + } + } goto bad_reg; case 13: /* Process ID. */ switch (op2) { @@ -1837,12 +2154,6 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn) return env->cp15.c13_fcse; case 1: return env->cp15.c13_context; - case 2: - return env->cp15.c13_tls1; - case 3: - return env->cp15.c13_tls2; - case 4: - return env->cp15.c13_tls3; default: goto bad_reg; } @@ -1887,12 +2198,20 @@ bad_reg: void HELPER(set_r13_banked)(CPUState *env, uint32_t mode, uint32_t val) { + if ((env->uncached_cpsr & CPSR_M) == mode) { + env->regs[13] = val; + } else { env->banked_r13[bank_number(mode)] = val; } +} uint32_t HELPER(get_r13_banked)(CPUState *env, uint32_t mode) { + if ((env->uncached_cpsr & CPSR_M) == mode) { + return env->regs[13]; + } else { return env->banked_r13[bank_number(mode)]; + } } uint32_t HELPER(v7m_mrs)(CPUState *env, uint32_t reg) @@ -2101,7 +2420,7 @@ static inline uint16_t add16_usat(uint16_t a, uint16_t b) static inline uint16_t sub16_usat(uint16_t a, uint16_t b) { - if (a < b) + if (a > b) return a - b; else return 0; @@ -2118,7 +2437,7 @@ static inline uint8_t add8_usat(uint8_t a, uint8_t b) static inline uint8_t sub8_usat(uint8_t a, uint8_t b) { - if (a < b) + if (a > b) return a - b; else return 0; @@ -2135,7 +2454,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) /* Signed modulo arithmetic. */ #define SARITH16(a, b, n, op) do { \ int32_t sum; \ - sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \ + sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ RESULT(sum, n, 16); \ if (sum >= 0) \ ge |= 3 << (n * 2); \ @@ -2143,7 +2462,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) #define SARITH8(a, b, n, op) do { \ int32_t sum; \ - sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \ + sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ RESULT(sum, n, 8); \ if (sum >= 0) \ ge |= 1 << n; \ @@ -2279,10 +2598,12 @@ static inline int vfp_exceptbits_from_host(int host_bits) target_bits |= 2; if (host_bits & float_flag_overflow) target_bits |= 4; - if (host_bits & float_flag_underflow) + if (host_bits & (float_flag_underflow | float_flag_output_denormal)) target_bits |= 8; if (host_bits & float_flag_inexact) target_bits |= 0x10; + if (host_bits & float_flag_input_denormal) + target_bits |= 0x80; return target_bits; } @@ -2295,10 +2616,16 @@ uint32_t HELPER(vfp_get_fpscr)(CPUState *env) | (env->vfp.vec_len << 16) | (env->vfp.vec_stride << 20); i = get_float_exception_flags(&env->vfp.fp_status); + i |= get_float_exception_flags(&env->vfp.standard_fp_status); fpscr |= vfp_exceptbits_from_host(i); return fpscr; } +uint32_t vfp_get_fpscr(CPUState *env) +{ + return HELPER(vfp_get_fpscr)(env); +} + /* Convert vfp exception flags to target form. */ static inline int vfp_exceptbits_to_host(int target_bits) { @@ -2314,6 +2641,8 @@ static inline int vfp_exceptbits_to_host(int target_bits) host_bits |= float_flag_underflow; if (target_bits & 0x10) host_bits |= float_flag_inexact; + if (target_bits & 0x80) + host_bits |= float_flag_input_denormal; return host_bits; } @@ -2346,13 +2675,21 @@ void HELPER(vfp_set_fpscr)(CPUState *env, uint32_t val) } set_float_rounding_mode(i, &env->vfp.fp_status); } - if (changed & (1 << 24)) + if (changed & (1 << 24)) { set_flush_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status); + set_flush_inputs_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status); + } if (changed & (1 << 25)) set_default_nan_mode((val & (1 << 25)) != 0, &env->vfp.fp_status); - i = vfp_exceptbits_to_host((val >> 8) & 0x1f); + i = vfp_exceptbits_to_host(val); set_float_exception_flags(i, &env->vfp.fp_status); + set_float_exception_flags(0, &env->vfp.standard_fp_status); +} + +void vfp_set_fpscr(CPUState *env, uint32_t val) +{ + HELPER(vfp_set_fpscr)(env, val); } #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) @@ -2432,203 +2769,384 @@ DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) #undef DO_VFP_cmp -/* Helper routines to perform bitwise copies between float and int. */ -static inline float32 vfp_itos(uint32_t i) -{ - union { - uint32_t i; - float32 s; - } v; +/* Integer to float and float to integer conversions */ - v.i = i; - return v.s; +#define CONV_ITOF(name, fsz, sign) \ + float##fsz HELPER(name)(uint32_t x, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return sign##int32_to_##float##fsz(x, fpst); \ } -static inline uint32_t vfp_stoi(float32 s) -{ - union { - uint32_t i; - float32 s; - } v; - - v.s = s; - return v.i; +#define CONV_FTOI(name, fsz, sign, round) \ +uint32_t HELPER(name)(float##fsz x, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + if (float##fsz##_is_any_nan(x)) { \ + float_raise(float_flag_invalid, fpst); \ + return 0; \ + } \ + return float##fsz##_to_##sign##int32##round(x, fpst); \ } -static inline float64 vfp_itod(uint64_t i) -{ - union { - uint64_t i; - float64 d; - } v; - - v.i = i; - return v.d; -} +#define FLOAT_CONVS(name, p, fsz, sign) \ +CONV_ITOF(vfp_##name##to##p, fsz, sign) \ +CONV_FTOI(vfp_to##name##p, fsz, sign, ) \ +CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero) -static inline uint64_t vfp_dtoi(float64 d) -{ - union { - uint64_t i; - float64 d; - } v; +FLOAT_CONVS(si, s, 32, ) +FLOAT_CONVS(si, d, 64, ) +FLOAT_CONVS(ui, s, 32, u) +FLOAT_CONVS(ui, d, 64, u) - v.d = d; - return v.i; -} +#undef CONV_ITOF +#undef CONV_FTOI +#undef FLOAT_CONVS -/* Integer to float conversion. */ -float32 VFP_HELPER(uito, s)(float32 x, CPUState *env) +/* floating point conversion */ +float64 VFP_HELPER(fcvtd, s)(float32 x, CPUState *env) { - return uint32_to_float32(vfp_stoi(x), &env->vfp.fp_status); + float64 r = float32_to_float64(x, &env->vfp.fp_status); + /* ARM requires that S<->D conversion of any kind of NaN generates + * a quiet NaN by forcing the most significant frac bit to 1. + */ + return float64_maybe_silence_nan(r); } -float64 VFP_HELPER(uito, d)(float32 x, CPUState *env) +float32 VFP_HELPER(fcvts, d)(float64 x, CPUState *env) { - return uint32_to_float64(vfp_stoi(x), &env->vfp.fp_status); + float32 r = float64_to_float32(x, &env->vfp.fp_status); + /* ARM requires that S<->D conversion of any kind of NaN generates + * a quiet NaN by forcing the most significant frac bit to 1. + */ + return float32_maybe_silence_nan(r); } -float32 VFP_HELPER(sito, s)(float32 x, CPUState *env) -{ - return int32_to_float32(vfp_stoi(x), &env->vfp.fp_status); -} +/* VFP3 fixed point conversion. */ +#define VFP_CONV_FIX(name, p, fsz, itype, sign) \ +float##fsz HELPER(vfp_##name##to##p)(uint##fsz##_t x, uint32_t shift, \ + void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + float##fsz tmp; \ + tmp = sign##int32_to_##float##fsz((itype##_t)x, fpst); \ + return float##fsz##_scalbn(tmp, -(int)shift, fpst); \ +} \ +uint##fsz##_t HELPER(vfp_to##name##p)(float##fsz x, uint32_t shift, \ + void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + float##fsz tmp; \ + if (float##fsz##_is_any_nan(x)) { \ + float_raise(float_flag_invalid, fpst); \ + return 0; \ + } \ + tmp = float##fsz##_scalbn(x, shift, fpst); \ + return float##fsz##_to_##itype##_round_to_zero(tmp, fpst); \ +} + +VFP_CONV_FIX(sh, d, 64, int16, ) +VFP_CONV_FIX(sl, d, 64, int32, ) +VFP_CONV_FIX(uh, d, 64, uint16, u) +VFP_CONV_FIX(ul, d, 64, uint32, u) +VFP_CONV_FIX(sh, s, 32, int16, ) +VFP_CONV_FIX(sl, s, 32, int32, ) +VFP_CONV_FIX(uh, s, 32, uint16, u) +VFP_CONV_FIX(ul, s, 32, uint32, u) +#undef VFP_CONV_FIX -float64 VFP_HELPER(sito, d)(float32 x, CPUState *env) +/* Half precision conversions. */ +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s) { - return int32_to_float64(vfp_stoi(x), &env->vfp.fp_status); + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float32 r = float16_to_float32(make_float16(a), ieee, s); + if (ieee) { + return float32_maybe_silence_nan(r); + } + return r; } -/* Float to integer conversion. */ -float32 VFP_HELPER(toui, s)(float32 x, CPUState *env) +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s) { - return vfp_itos(float32_to_uint32(x, &env->vfp.fp_status)); + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float16 r = float32_to_float16(a, ieee, s); + if (ieee) { + r = float16_maybe_silence_nan(r); + } + return float16_val(r); } -float32 VFP_HELPER(toui, d)(float64 x, CPUState *env) +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env) { - return vfp_itos(float64_to_uint32(x, &env->vfp.fp_status)); + return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status); } -float32 VFP_HELPER(tosi, s)(float32 x, CPUState *env) +uint32_t HELPER(neon_fcvt_f32_to_f16)(float32 a, CPUState *env) { - return vfp_itos(float32_to_int32(x, &env->vfp.fp_status)); + return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status); } -float32 VFP_HELPER(tosi, d)(float64 x, CPUState *env) +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) { - return vfp_itos(float64_to_int32(x, &env->vfp.fp_status)); + return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status); } -float32 VFP_HELPER(touiz, s)(float32 x, CPUState *env) +uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) { - return vfp_itos(float32_to_uint32_round_to_zero(x, &env->vfp.fp_status)); + return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status); } -float32 VFP_HELPER(touiz, d)(float64 x, CPUState *env) -{ - return vfp_itos(float64_to_uint32_round_to_zero(x, &env->vfp.fp_status)); -} +#define float32_two make_float32(0x40000000) +#define float32_three make_float32(0x40400000) +#define float32_one_point_five make_float32(0x3fc00000) -float32 VFP_HELPER(tosiz, s)(float32 x, CPUState *env) +float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env) { - return vfp_itos(float32_to_int32_round_to_zero(x, &env->vfp.fp_status)); + float_status *s = &env->vfp.standard_fp_status; + if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || + (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { + if (!(float32_is_zero(a) || float32_is_zero(b))) { + float_raise(float_flag_input_denormal, s); + } + return float32_two; + } + return float32_sub(float32_two, float32_mul(a, b, s), s); } -float32 VFP_HELPER(tosiz, d)(float64 x, CPUState *env) +float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env) { - return vfp_itos(float64_to_int32_round_to_zero(x, &env->vfp.fp_status)); + float_status *s = &env->vfp.standard_fp_status; + float32 product; + if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || + (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { + if (!(float32_is_zero(a) || float32_is_zero(b))) { + float_raise(float_flag_input_denormal, s); + } + return float32_one_point_five; + } + product = float32_mul(a, b, s); + return float32_div(float32_sub(float32_three, product, s), float32_two, s); } -/* floating point conversion */ -float64 VFP_HELPER(fcvtd, s)(float32 x, CPUState *env) +/* NEON helpers. */ + +/* Constants 256 and 512 are used in some helpers; we avoid relying on + * int->float conversions at run-time. */ +#define float64_256 make_float64(0x4070000000000000LL) +#define float64_512 make_float64(0x4080000000000000LL) + +/* The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM. + */ +static float64 recip_estimate(float64 a, CPUState *env) { - return float32_to_float64(x, &env->vfp.fp_status); + /* These calculations mustn't set any fp exception flags, + * so we use a local copy of the fp_status. + */ + float_status dummy_status = env->vfp.standard_fp_status; + float_status *s = &dummy_status; + /* q = (int)(a * 512.0) */ + float64 q = float64_mul(float64_512, a, s); + int64_t q_int = float64_to_int64_round_to_zero(q, s); + + /* r = 1.0 / (((double)q + 0.5) / 512.0) */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_512, s); + q = float64_div(float64_one, q, s); + + /* s = (int)(256.0 * r + 0.5) */ + q = float64_mul(q, float64_256, s); + q = float64_add(q, float64_half, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* return (double)s / 256.0 */ + return float64_div(int64_to_float64(q_int, s), float64_256, s); } -float32 VFP_HELPER(fcvts, d)(float64 x, CPUState *env) +float32 HELPER(recpe_f32)(float32 a, CPUState *env) { - return float64_to_float32(x, &env->vfp.fp_status); -} + float_status *s = &env->vfp.standard_fp_status; + float64 f64; + uint32_t val32 = float32_val(a); -/* VFP3 fixed point conversion. */ -#define VFP_CONV_FIX(name, p, ftype, itype, sign) \ -ftype VFP_HELPER(name##to, p)(ftype x, uint32_t shift, CPUState *env) \ -{ \ - ftype tmp; \ - tmp = sign##int32_to_##ftype ((itype)vfp_##p##toi(x), \ - &env->vfp.fp_status); \ - return ftype##_scalbn(tmp, -(int)shift, &env->vfp.fp_status); \ -} \ -ftype VFP_HELPER(to##name, p)(ftype x, uint32_t shift, CPUState *env) \ -{ \ - ftype tmp; \ - tmp = ftype##_scalbn(x, shift, &env->vfp.fp_status); \ - return vfp_ito##p((itype)ftype##_to_##sign##int32_round_to_zero(tmp, \ - &env->vfp.fp_status)); \ -} - -VFP_CONV_FIX(sh, d, float64, int16, ) -VFP_CONV_FIX(sl, d, float64, int32, ) -VFP_CONV_FIX(uh, d, float64, uint16, u) -VFP_CONV_FIX(ul, d, float64, uint32, u) -VFP_CONV_FIX(sh, s, float32, int16, ) -VFP_CONV_FIX(sl, s, float32, int32, ) -VFP_CONV_FIX(uh, s, float32, uint16, u) -VFP_CONV_FIX(ul, s, float32, uint32, u) -#undef VFP_CONV_FIX + int result_exp; + int a_exp = (val32 & 0x7f800000) >> 23; + int sign = val32 & 0x80000000; -float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env) -{ - float_status *s = &env->vfp.fp_status; - float32 two = int32_to_float32(2, s); - return float32_sub(two, float32_mul(a, b, s), s); + if (float32_is_any_nan(a)) { + if (float32_is_signaling_nan(a)) { + float_raise(float_flag_invalid, s); + } + return float32_default_nan; + } else if (float32_is_infinity(a)) { + return float32_set_sign(float32_zero, float32_is_neg(a)); + } else if (float32_is_zero_or_denormal(a)) { + if (!float32_is_zero(a)) { + float_raise(float_flag_input_denormal, s); + } + float_raise(float_flag_divbyzero, s); + return float32_set_sign(float32_infinity, float32_is_neg(a)); + } else if (a_exp >= 253) { + float_raise(float_flag_underflow, s); + return float32_set_sign(float32_zero, float32_is_neg(a)); + } + + f64 = make_float64((0x3feULL << 52) + | ((int64_t)(val32 & 0x7fffff) << 29)); + + result_exp = 253 - a_exp; + + f64 = recip_estimate(f64, env); + + val32 = sign + | ((result_exp & 0xff) << 23) + | ((float64_val(f64) >> 29) & 0x7fffff); + return make_float32(val32); } -float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env) +/* The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM. + */ +static float64 recip_sqrt_estimate(float64 a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 three = int32_to_float32(3, s); - return float32_sub(three, float32_mul(a, b, s), s); -} + /* These calculations mustn't set any fp exception flags, + * so we use a local copy of the fp_status. + */ + float_status dummy_status = env->vfp.standard_fp_status; + float_status *s = &dummy_status; + float64 q; + int64_t q_int; -/* NEON helpers. */ + if (float64_lt(a, float64_half, s)) { + /* range 0.25 <= a < 0.5 */ -/* TODO: The architecture specifies the value that the estimate functions - should return. We return the exact reciprocal/root instead. */ -float32 HELPER(recpe_f32)(float32 a, CPUState *env) -{ - float_status *s = &env->vfp.fp_status; - float32 one = int32_to_float32(1, s); - return float32_div(one, a, s); + /* a in units of 1/512 rounded down */ + /* q0 = (int)(a * 512.0); */ + q = float64_mul(float64_512, a, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* reciprocal root r */ + /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_512, s); + q = float64_sqrt(q, s); + q = float64_div(float64_one, q, s); + } else { + /* range 0.5 <= a < 1.0 */ + + /* a in units of 1/256 rounded down */ + /* q1 = (int)(a * 256.0); */ + q = float64_mul(float64_256, a, s); + int64_t q_int = float64_to_int64_round_to_zero(q, s); + + /* reciprocal root r */ + /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_256, s); + q = float64_sqrt(q, s); + q = float64_div(float64_one, q, s); + } + /* r in units of 1/256 rounded to nearest */ + /* s = (int)(256.0 * r + 0.5); */ + + q = float64_mul(q, float64_256,s ); + q = float64_add(q, float64_half, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* return (double)s / 256.0;*/ + return float64_div(int64_to_float64(q_int, s), float64_256, s); } float32 HELPER(rsqrte_f32)(float32 a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 one = int32_to_float32(1, s); - return float32_div(one, float32_sqrt(a, s), s); + float_status *s = &env->vfp.standard_fp_status; + int result_exp; + float64 f64; + uint32_t val; + uint64_t val64; + + val = float32_val(a); + + if (float32_is_any_nan(a)) { + if (float32_is_signaling_nan(a)) { + float_raise(float_flag_invalid, s); + } + return float32_default_nan; + } else if (float32_is_zero_or_denormal(a)) { + if (!float32_is_zero(a)) { + float_raise(float_flag_input_denormal, s); + } + float_raise(float_flag_divbyzero, s); + return float32_set_sign(float32_infinity, float32_is_neg(a)); + } else if (float32_is_neg(a)) { + float_raise(float_flag_invalid, s); + return float32_default_nan; + } else if (float32_is_infinity(a)) { + return float32_zero; + } + + /* Normalize to a double-precision value between 0.25 and 1.0, + * preserving the parity of the exponent. */ + if ((val & 0x800000) == 0) { + f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + | (0x3feULL << 52) + | ((uint64_t)(val & 0x7fffff) << 29)); + } else { + f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + | (0x3fdULL << 52) + | ((uint64_t)(val & 0x7fffff) << 29)); + } + + result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2; + + f64 = recip_sqrt_estimate(f64, env); + + val64 = float64_val(f64); + + val = ((val64 >> 63) & 0x80000000) + | ((result_exp & 0xff) << 23) + | ((val64 >> 29) & 0x7fffff); + return make_float32(val); } uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 tmp; - tmp = int32_to_float32(a, s); - tmp = float32_scalbn(tmp, -32, s); - tmp = helper_recpe_f32(tmp, env); - tmp = float32_scalbn(tmp, 31, s); - return float32_to_int32(tmp, s); + float64 f64; + + if ((a & 0x80000000) == 0) { + return 0xffffffff; + } + + f64 = make_float64((0x3feULL << 52) + | ((int64_t)(a & 0x7fffffff) << 21)); + + f64 = recip_estimate (f64, env); + + return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 tmp; - tmp = int32_to_float32(a, s); - tmp = float32_scalbn(tmp, -32, s); - tmp = helper_rsqrte_f32(tmp, env); - tmp = float32_scalbn(tmp, 31, s); - return float32_to_int32(tmp, s); + float64 f64; + + if ((a & 0xc0000000) == 0) { + return 0xffffffff; + } + + if (a & 0x80000000) { + f64 = make_float64((0x3feULL << 52) + | ((uint64_t)(a & 0x7fffffff) << 21)); + } else { /* bits 31-30 == '01' */ + f64 = make_float64((0x3fdULL << 52) + | ((uint64_t)(a & 0x3fffffff) << 22)); + } + + f64 = recip_sqrt_estimate(f64, env); + + return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } void HELPER(set_teecr)(CPUState *env, uint32_t val) diff --git a/target-arm/helpers.h b/target-arm/helper.h index bf210fe..850e3d0 100644 --- a/target-arm/helpers.h +++ b/target-arm/helper.h @@ -68,10 +68,6 @@ DEF_HELPER_2(get_cp, i32, env, i32) DEF_HELPER_2(get_r13_banked, i32, env, i32) DEF_HELPER_3(set_r13_banked, void, env, i32, i32) -DEF_HELPER_2(mark_exclusive, void, env, i32) -DEF_HELPER_2(test_exclusive, i32, env, i32) -DEF_HELPER_1(clrex, void, env) - DEF_HELPER_1(get_user_reg, i32, i32) DEF_HELPER_2(set_user_reg, void, i32, i32) @@ -100,36 +96,41 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) -DEF_HELPER_2(vfp_uitos, f32, f32, env) -DEF_HELPER_2(vfp_uitod, f64, f32, env) -DEF_HELPER_2(vfp_sitos, f32, f32, env) -DEF_HELPER_2(vfp_sitod, f64, f32, env) - -DEF_HELPER_2(vfp_touis, f32, f32, env) -DEF_HELPER_2(vfp_touid, f32, f64, env) -DEF_HELPER_2(vfp_touizs, f32, f32, env) -DEF_HELPER_2(vfp_touizd, f32, f64, env) -DEF_HELPER_2(vfp_tosis, f32, f32, env) -DEF_HELPER_2(vfp_tosid, f32, f64, env) -DEF_HELPER_2(vfp_tosizs, f32, f32, env) -DEF_HELPER_2(vfp_tosizd, f32, f64, env) - -DEF_HELPER_3(vfp_toshs, f32, f32, i32, env) -DEF_HELPER_3(vfp_tosls, f32, f32, i32, env) -DEF_HELPER_3(vfp_touhs, f32, f32, i32, env) -DEF_HELPER_3(vfp_touls, f32, f32, i32, env) -DEF_HELPER_3(vfp_toshd, f64, f64, i32, env) -DEF_HELPER_3(vfp_tosld, f64, f64, i32, env) -DEF_HELPER_3(vfp_touhd, f64, f64, i32, env) -DEF_HELPER_3(vfp_tould, f64, f64, i32, env) -DEF_HELPER_3(vfp_shtos, f32, f32, i32, env) -DEF_HELPER_3(vfp_sltos, f32, f32, i32, env) -DEF_HELPER_3(vfp_uhtos, f32, f32, i32, env) -DEF_HELPER_3(vfp_ultos, f32, f32, i32, env) -DEF_HELPER_3(vfp_shtod, f64, f64, i32, env) -DEF_HELPER_3(vfp_sltod, f64, f64, i32, env) -DEF_HELPER_3(vfp_uhtod, f64, f64, i32, env) -DEF_HELPER_3(vfp_ultod, f64, f64, i32, env) +DEF_HELPER_2(vfp_uitos, f32, i32, ptr) +DEF_HELPER_2(vfp_uitod, f64, i32, ptr) +DEF_HELPER_2(vfp_sitos, f32, i32, ptr) +DEF_HELPER_2(vfp_sitod, f64, i32, ptr) + +DEF_HELPER_2(vfp_touis, i32, f32, ptr) +DEF_HELPER_2(vfp_touid, i32, f64, ptr) +DEF_HELPER_2(vfp_touizs, i32, f32, ptr) +DEF_HELPER_2(vfp_touizd, i32, f64, ptr) +DEF_HELPER_2(vfp_tosis, i32, f32, ptr) +DEF_HELPER_2(vfp_tosid, i32, f64, ptr) +DEF_HELPER_2(vfp_tosizs, i32, f32, ptr) +DEF_HELPER_2(vfp_tosizd, i32, f64, ptr) + +DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) + +DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env) +DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env) +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env) +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env) DEF_HELPER_3(recps_f32, f32, f32, f32, env) DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) @@ -138,10 +139,6 @@ DEF_HELPER_2(rsqrte_f32, f32, f32, env) DEF_HELPER_2(recpe_u32, i32, i32, env) DEF_HELPER_2(rsqrte_u32, i32, i32, env) DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32) -DEF_HELPER_2(neon_add_saturate_u64, i64, i64, i64) -DEF_HELPER_2(neon_add_saturate_s64, i64, i64, i64) -DEF_HELPER_2(neon_sub_saturate_u64, i64, i64, i64) -DEF_HELPER_2(neon_sub_saturate_s64, i64, i64, i64) DEF_HELPER_2(add_cc, i32, i32, i32) DEF_HELPER_2(adc_cc, i32, i32, i32) @@ -151,21 +148,28 @@ DEF_HELPER_2(sbc_cc, i32, i32, i32) DEF_HELPER_2(shl, i32, i32, i32) DEF_HELPER_2(shr, i32, i32, i32) DEF_HELPER_2(sar, i32, i32, i32) -DEF_HELPER_2(ror, i32, i32, i32) DEF_HELPER_2(shl_cc, i32, i32, i32) DEF_HELPER_2(shr_cc, i32, i32, i32) DEF_HELPER_2(sar_cc, i32, i32, i32) DEF_HELPER_2(ror_cc, i32, i32, i32) /* neon_helper.c */ -DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) +DEF_HELPER_2(neon_qadd_u8, i32, i32, i32) +DEF_HELPER_2(neon_qadd_s8, i32, i32, i32) +DEF_HELPER_2(neon_qadd_u16, i32, i32, i32) +DEF_HELPER_2(neon_qadd_s16, i32, i32, i32) +DEF_HELPER_2(neon_qadd_u32, i32, i32, i32) +DEF_HELPER_2(neon_qadd_s32, i32, i32, i32) +DEF_HELPER_2(neon_qsub_u8, i32, i32, i32) +DEF_HELPER_2(neon_qsub_s8, i32, i32, i32) +DEF_HELPER_2(neon_qsub_u16, i32, i32, i32) +DEF_HELPER_2(neon_qsub_s16, i32, i32, i32) +DEF_HELPER_2(neon_qsub_u32, i32, i32, i32) +DEF_HELPER_2(neon_qsub_s32, i32, i32, i32) +DEF_HELPER_2(neon_qadd_u64, i64, i64, i64) +DEF_HELPER_2(neon_qadd_s64, i64, i64, i64) +DEF_HELPER_2(neon_qsub_u64, i64, i64, i64) +DEF_HELPER_2(neon_qsub_s64, i64, i64, i64) DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) @@ -243,22 +247,26 @@ DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) -DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) +DEF_HELPER_2(neon_qshl_u8, i32, i32, i32) +DEF_HELPER_2(neon_qshl_s8, i32, i32, i32) +DEF_HELPER_2(neon_qshl_u16, i32, i32, i32) +DEF_HELPER_2(neon_qshl_s16, i32, i32, i32) +DEF_HELPER_2(neon_qshl_u32, i32, i32, i32) +DEF_HELPER_2(neon_qshl_s32, i32, i32, i32) +DEF_HELPER_2(neon_qshl_u64, i64, i64, i64) +DEF_HELPER_2(neon_qshl_s64, i64, i64, i64) +DEF_HELPER_2(neon_qshlu_s8, i32, i32, i32); +DEF_HELPER_2(neon_qshlu_s16, i32, i32, i32); +DEF_HELPER_2(neon_qshlu_s32, i32, i32, i32); +DEF_HELPER_2(neon_qshlu_s64, i64, i64, i64); +DEF_HELPER_2(neon_qrshl_u8, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_s8, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_u16, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_s16, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_u32, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_s32, i32, i32, i32) +DEF_HELPER_2(neon_qrshl_u64, i64, i64, i64) +DEF_HELPER_2(neon_qrshl_s64, i64, i64, i64) DEF_HELPER_2(neon_add_u8, i32, i32, i32) DEF_HELPER_2(neon_add_u16, i32, i32, i32) @@ -269,6 +277,7 @@ DEF_HELPER_2(neon_sub_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_u8, i32, i32, i32) DEF_HELPER_2(neon_mul_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_p8, i32, i32, i32) +DEF_HELPER_2(neon_mull_p8, i64, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) @@ -286,19 +295,22 @@ DEF_HELPER_1(neon_cls_s16, i32, i32) DEF_HELPER_1(neon_cls_s32, i32, i32) DEF_HELPER_1(neon_cnt_u8, i32, i32) -DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) +DEF_HELPER_2(neon_qdmulh_s16, i32, i32, i32) +DEF_HELPER_2(neon_qrdmulh_s16, i32, i32, i32) +DEF_HELPER_2(neon_qdmulh_s32, i32, i32, i32) +DEF_HELPER_2(neon_qrdmulh_s32, i32, i32, i32) DEF_HELPER_1(neon_narrow_u8, i32, i64) DEF_HELPER_1(neon_narrow_u16, i32, i64) -DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64) -DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64) +DEF_HELPER_1(neon_unarrow_sat8, i32, i64) +DEF_HELPER_1(neon_narrow_sat_u8, i32, i64) +DEF_HELPER_1(neon_narrow_sat_s8, i32, i64) +DEF_HELPER_1(neon_unarrow_sat16, i32, i64) +DEF_HELPER_1(neon_narrow_sat_u16, i32, i64) +DEF_HELPER_1(neon_narrow_sat_s16, i32, i64) +DEF_HELPER_1(neon_unarrow_sat32, i32, i64) +DEF_HELPER_1(neon_narrow_sat_u32, i32, i64) +DEF_HELPER_1(neon_narrow_sat_s32, i32, i64) DEF_HELPER_1(neon_narrow_high_u8, i32, i64) DEF_HELPER_1(neon_narrow_high_u16, i32, i64) DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) @@ -314,8 +326,8 @@ DEF_HELPER_2(neon_paddl_u16, i64, i64, i64) DEF_HELPER_2(neon_paddl_u32, i64, i64, i64) DEF_HELPER_2(neon_subl_u16, i64, i64, i64) DEF_HELPER_2(neon_subl_u32, i64, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) -DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) +DEF_HELPER_2(neon_addl_saturate_s32, i64, i64, i64) +DEF_HELPER_2(neon_addl_saturate_s64, i64, i64, i64) DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) @@ -331,18 +343,12 @@ DEF_HELPER_1(neon_negl_u16, i64, i64) DEF_HELPER_1(neon_negl_u32, i64, i64) DEF_HELPER_1(neon_negl_u64, i64, i64) -DEF_HELPER_2(neon_qabs_s8, i32, env, i32) -DEF_HELPER_2(neon_qabs_s16, i32, env, i32) -DEF_HELPER_2(neon_qabs_s32, i32, env, i32) -DEF_HELPER_2(neon_qneg_s8, i32, env, i32) -DEF_HELPER_2(neon_qneg_s16, i32, env, i32) -DEF_HELPER_2(neon_qneg_s32, i32, env, i32) - -DEF_HELPER_0(neon_trn_u8, void) -DEF_HELPER_0(neon_trn_u16, void) -DEF_HELPER_0(neon_unzip_u8, void) -DEF_HELPER_0(neon_zip_u8, void) -DEF_HELPER_0(neon_zip_u16, void) +DEF_HELPER_1(neon_qabs_s8, i32, i32) +DEF_HELPER_1(neon_qabs_s16, i32, i32) +DEF_HELPER_1(neon_qabs_s32, i32, i32) +DEF_HELPER_1(neon_qneg_s8, i32, i32) +DEF_HELPER_1(neon_qneg_s16, i32, i32) +DEF_HELPER_1(neon_qneg_s32, i32, i32) DEF_HELPER_2(neon_min_f32, i32, i32, i32) DEF_HELPER_2(neon_max_f32, i32, i32, i32) @@ -369,47 +375,47 @@ DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) -#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ -DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ -DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ - -DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) -DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) - -DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) -DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) - -DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) -DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) - -DEF_IWMMXT_HELPER_SIZE_ENV(mins) -DEF_IWMMXT_HELPER_SIZE_ENV(minu) -DEF_IWMMXT_HELPER_SIZE_ENV(maxs) -DEF_IWMMXT_HELPER_SIZE_ENV(maxu) - -DEF_IWMMXT_HELPER_SIZE_ENV(subn) -DEF_IWMMXT_HELPER_SIZE_ENV(addn) -DEF_IWMMXT_HELPER_SIZE_ENV(subu) -DEF_IWMMXT_HELPER_SIZE_ENV(addu) -DEF_IWMMXT_HELPER_SIZE_ENV(subs) -DEF_IWMMXT_HELPER_SIZE_ENV(adds) - -DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) +#define DEF_IWMMXT_HELPER_SIZE(name) \ +DEF_HELPER_2(iwmmxt_##name##b, i64, i64, i64) \ +DEF_HELPER_2(iwmmxt_##name##w, i64, i64, i64) \ +DEF_HELPER_2(iwmmxt_##name##l, i64, i64, i64) \ + +DEF_IWMMXT_HELPER_SIZE(unpackl) +DEF_IWMMXT_HELPER_SIZE(unpackh) + +DEF_HELPER_1(iwmmxt_unpacklub, i64, i64) +DEF_HELPER_1(iwmmxt_unpackluw, i64, i64) +DEF_HELPER_1(iwmmxt_unpacklul, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhub, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhuw, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhul, i64, i64) +DEF_HELPER_1(iwmmxt_unpacklsb, i64, i64) +DEF_HELPER_1(iwmmxt_unpacklsw, i64, i64) +DEF_HELPER_1(iwmmxt_unpacklsl, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhsb, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhsw, i64, i64) +DEF_HELPER_1(iwmmxt_unpackhsl, i64, i64) + +DEF_IWMMXT_HELPER_SIZE(cmpeq) +DEF_IWMMXT_HELPER_SIZE(cmpgtu) +DEF_IWMMXT_HELPER_SIZE(cmpgts) + +DEF_IWMMXT_HELPER_SIZE(mins) +DEF_IWMMXT_HELPER_SIZE(minu) +DEF_IWMMXT_HELPER_SIZE(maxs) +DEF_IWMMXT_HELPER_SIZE(maxu) + +DEF_IWMMXT_HELPER_SIZE(subn) +DEF_IWMMXT_HELPER_SIZE(addn) +DEF_IWMMXT_HELPER_SIZE(subu) +DEF_IWMMXT_HELPER_SIZE(addu) +DEF_IWMMXT_HELPER_SIZE(subs) +DEF_IWMMXT_HELPER_SIZE(adds) + +DEF_HELPER_2(iwmmxt_avgb0, i64, i64, i64) +DEF_HELPER_2(iwmmxt_avgb1, i64, i64, i64) +DEF_HELPER_2(iwmmxt_avgw0, i64, i64, i64) +DEF_HELPER_2(iwmmxt_avgw1, i64, i64, i64) DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64) @@ -428,26 +434,26 @@ DEF_HELPER_1(iwmmxt_msbb, i32, i64) DEF_HELPER_1(iwmmxt_msbw, i32, i64) DEF_HELPER_1(iwmmxt_msbl, i32, i64) -DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) -DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) - -DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) -DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) +DEF_HELPER_2(iwmmxt_srlw, i64, i64, i32) +DEF_HELPER_2(iwmmxt_srll, i64, i64, i32) +DEF_HELPER_2(iwmmxt_srlq, i64, i64, i32) +DEF_HELPER_2(iwmmxt_sllw, i64, i64, i32) +DEF_HELPER_2(iwmmxt_slll, i64, i64, i32) +DEF_HELPER_2(iwmmxt_sllq, i64, i64, i32) +DEF_HELPER_2(iwmmxt_sraw, i64, i64, i32) +DEF_HELPER_2(iwmmxt_sral, i64, i64, i32) +DEF_HELPER_2(iwmmxt_sraq, i64, i64, i32) +DEF_HELPER_2(iwmmxt_rorw, i64, i64, i32) +DEF_HELPER_2(iwmmxt_rorl, i64, i64, i32) +DEF_HELPER_2(iwmmxt_rorq, i64, i64, i32) +DEF_HELPER_2(iwmmxt_shufh, i64, i64, i32) + +DEF_HELPER_2(iwmmxt_packuw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_packul, i64, i64, i64) +DEF_HELPER_2(iwmmxt_packuq, i64, i64, i64) +DEF_HELPER_2(iwmmxt_packsw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_packsl, i64, i64, i64) +DEF_HELPER_2(iwmmxt_packsq, i64, i64, i64) DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) @@ -455,4 +461,17 @@ DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) DEF_HELPER_2(set_teecr, void, env, i32) +DEF_HELPER_2(neon_unzip8, void, i32, i32) +DEF_HELPER_2(neon_unzip16, void, i32, i32) +DEF_HELPER_2(neon_qunzip8, void, i32, i32) +DEF_HELPER_2(neon_qunzip16, void, i32, i32) +DEF_HELPER_2(neon_qunzip32, void, i32, i32) +DEF_HELPER_2(neon_zip8, void, i32, i32) +DEF_HELPER_2(neon_zip16, void, i32, i32) +DEF_HELPER_2(neon_qzip8, void, i32, i32) +DEF_HELPER_2(neon_qzip16, void, i32, i32) +DEF_HELPER_2(neon_qzip32, void, i32, i32) +DEF_HELPER_1(neon_vldst_all, void, i32) + #include "helper-android.h" +#include "def-helper.h" diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c index 2e4193e..ebe6eb9 100644 --- a/target-arm/iwmmxt_helper.c +++ b/target-arm/iwmmxt_helper.c @@ -16,16 +16,15 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include "cpu.h" -#include "exec-all.h" -#include "helpers.h" +#include "exec.h" +#include "helper.h" /* iwMMXt macros extracted from GNU gdb. */ @@ -163,8 +162,7 @@ uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b) SIMD64_SET(NBIT64(x), SIMD_NBIT) | \ SIMD64_SET(ZBIT64(x), SIMD_ZBIT) #define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3) \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(uint64_t a, uint64_t b) \ { \ a = \ (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) | \ @@ -178,8 +176,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUState *env, \ NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ return a; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(uint64_t a, uint64_t b) \ { \ a = \ (((a >> SH0) & 0xffff) << 0) | \ @@ -191,8 +188,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUState *env, \ NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3); \ return a; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(uint64_t a, uint64_t b) \ { \ a = \ (((a >> SH0) & 0xffffffff) << 0) | \ @@ -201,8 +197,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUState *env, \ NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ return a; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(uint64_t x) \ { \ x = \ (((x >> SH0) & 0xff) << 0) | \ @@ -214,8 +209,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUState *env, \ NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ return x; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(uint64_t x) \ { \ x = \ (((x >> SH0) & 0xffff) << 0) | \ @@ -224,15 +218,13 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUState *env, \ NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ return x; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(uint64_t x) \ { \ x = (((x >> SH0) & 0xffffffff) << 0); \ env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ return x; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(uint64_t x) \ { \ x = \ ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) | \ @@ -244,8 +236,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUState *env, \ NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ return x; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(uint64_t x) \ { \ x = \ ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) | \ @@ -254,8 +245,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUState *env, \ NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ return x; \ } \ -uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUState *env, \ - uint64_t x) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(uint64_t x) \ { \ x = EXTEND32((x >> SH0) & 0xffffffff); \ env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ @@ -265,8 +255,7 @@ IWMMXT_OP_UNPACK(l, 0, 8, 16, 24) IWMMXT_OP_UNPACK(h, 32, 40, 48, 56) #define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O) \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(uint64_t a, uint64_t b) \ { \ a = \ CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) | \ @@ -280,8 +269,7 @@ uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUState *env, \ NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ return a; \ } \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(uint64_t a, uint64_t b) \ { \ a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) | \ CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff); \ @@ -290,8 +278,7 @@ uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUState *env, \ NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); \ return a; \ } \ -uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUState *env, \ - uint64_t a, uint64_t b) \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(uint64_t a, uint64_t b) \ { \ a = CMP(0, Tl, O, 0xffffffff) | \ CMP(32, Tl, O, 0xffffffff); \ @@ -330,7 +317,7 @@ IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +) #define AVGB(SHR) ((( \ ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR) #define IWMMXT_OP_AVGB(r) \ -uint64_t HELPER(iwmmxt_avgb##r)(CPUState *env, uint64_t a, uint64_t b) \ +uint64_t HELPER(iwmmxt_avgb##r)(uint64_t a, uint64_t b) \ { \ const int round = r; \ a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) | \ @@ -354,7 +341,7 @@ IWMMXT_OP_AVGB(1) #define AVGW(SHR) ((( \ ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR) #define IWMMXT_OP_AVGW(r) \ -uint64_t HELPER(iwmmxt_avgw##r)(CPUState *env, uint64_t a, uint64_t b) \ +uint64_t HELPER(iwmmxt_avgw##r)(uint64_t a, uint64_t b) \ { \ const int round = r; \ a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48); \ @@ -465,7 +452,7 @@ uint32_t HELPER(iwmmxt_msbl)(uint64_t x) } /* FIXME: Split wCASF setting into a separate op to avoid env use. */ -uint64_t HELPER(iwmmxt_srlw)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_srlw)(uint64_t x, uint32_t n) { x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) | (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) | @@ -477,7 +464,7 @@ uint64_t HELPER(iwmmxt_srlw)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_srll)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_srll)(uint64_t x, uint32_t n) { x = ((x & (0xffffffffll << 0)) >> n) | ((x >> n) & (0xffffffffll << 32)); @@ -486,14 +473,14 @@ uint64_t HELPER(iwmmxt_srll)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_srlq)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_srlq)(uint64_t x, uint32_t n) { x >>= n; env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); return x; } -uint64_t HELPER(iwmmxt_sllw)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_sllw)(uint64_t x, uint32_t n) { x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) | (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) | @@ -505,7 +492,7 @@ uint64_t HELPER(iwmmxt_sllw)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_slll)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_slll)(uint64_t x, uint32_t n) { x = ((x << n) & (0xffffffffll << 0)) | ((x & (0xffffffffll << 32)) << n); @@ -514,14 +501,14 @@ uint64_t HELPER(iwmmxt_slll)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_sllq)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_sllq)(uint64_t x, uint32_t n) { x <<= n; env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); return x; } -uint64_t HELPER(iwmmxt_sraw)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_sraw)(uint64_t x, uint32_t n) { x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) | ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) | @@ -533,7 +520,7 @@ uint64_t HELPER(iwmmxt_sraw)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_sral)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_sral)(uint64_t x, uint32_t n) { x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) | (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32); @@ -542,14 +529,14 @@ uint64_t HELPER(iwmmxt_sral)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_sraq)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_sraq)(uint64_t x, uint32_t n) { x = (int64_t) x >> n; env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); return x; } -uint64_t HELPER(iwmmxt_rorw)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_rorw)(uint64_t x, uint32_t n) { x = ((((x & (0xffffll << 0)) >> n) | ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) | @@ -565,7 +552,7 @@ uint64_t HELPER(iwmmxt_rorw)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_rorl)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_rorl)(uint64_t x, uint32_t n) { x = ((x & (0xffffffffll << 0)) >> n) | ((x >> n) & (0xffffffffll << 32)) | @@ -576,14 +563,14 @@ uint64_t HELPER(iwmmxt_rorl)(CPUState *env, uint64_t x, uint32_t n) return x; } -uint64_t HELPER(iwmmxt_rorq)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_rorq)(uint64_t x, uint32_t n) { x = (x >> n) | (x << (64 - n)); env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); return x; } -uint64_t HELPER(iwmmxt_shufh)(CPUState *env, uint64_t x, uint32_t n) +uint64_t HELPER(iwmmxt_shufh)(uint64_t x, uint32_t n) { x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) | (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) | @@ -596,7 +583,7 @@ uint64_t HELPER(iwmmxt_shufh)(CPUState *env, uint64_t x, uint32_t n) } /* TODO: Unsigned-Saturation */ -uint64_t HELPER(iwmmxt_packuw)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packuw)(uint64_t a, uint64_t b) { a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | @@ -610,7 +597,7 @@ uint64_t HELPER(iwmmxt_packuw)(CPUState *env, uint64_t a, uint64_t b) return a; } -uint64_t HELPER(iwmmxt_packul)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packul)(uint64_t a, uint64_t b) { a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); @@ -620,7 +607,7 @@ uint64_t HELPER(iwmmxt_packul)(CPUState *env, uint64_t a, uint64_t b) return a; } -uint64_t HELPER(iwmmxt_packuq)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packuq)(uint64_t a, uint64_t b) { a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = @@ -629,7 +616,7 @@ uint64_t HELPER(iwmmxt_packuq)(CPUState *env, uint64_t a, uint64_t b) } /* TODO: Signed-Saturation */ -uint64_t HELPER(iwmmxt_packsw)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packsw)(uint64_t a, uint64_t b) { a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | @@ -643,7 +630,7 @@ uint64_t HELPER(iwmmxt_packsw)(CPUState *env, uint64_t a, uint64_t b) return a; } -uint64_t HELPER(iwmmxt_packsl)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packsl)(uint64_t a, uint64_t b) { a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); @@ -653,7 +640,7 @@ uint64_t HELPER(iwmmxt_packsl)(CPUState *env, uint64_t a, uint64_t b) return a; } -uint64_t HELPER(iwmmxt_packsq)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(iwmmxt_packsq)(uint64_t a, uint64_t b) { a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = diff --git a/target-arm/machine.c b/target-arm/machine.c index b1deacb..1726fd5 100644 --- a/target-arm/machine.c +++ b/target-arm/machine.c @@ -22,12 +22,18 @@ void cpu_save(QEMUFile *f, void *opaque) } qemu_put_be32(f, env->cp15.c0_cpuid); qemu_put_be32(f, env->cp15.c0_cachetype); + qemu_put_be32(f, env->cp15.c0_cssel); qemu_put_be32(f, env->cp15.c1_sys); qemu_put_be32(f, env->cp15.c1_coproc); qemu_put_be32(f, env->cp15.c1_xscaleauxcr); + qemu_put_be32(f, env->cp15.c1_secfg); + qemu_put_be32(f, env->cp15.c1_sedbg); + qemu_put_be32(f, env->cp15.c1_nseac); qemu_put_be32(f, env->cp15.c2_base0); qemu_put_be32(f, env->cp15.c2_base1); + qemu_put_be32(f, env->cp15.c2_control); qemu_put_be32(f, env->cp15.c2_mask); + qemu_put_be32(f, env->cp15.c2_base_mask); qemu_put_be32(f, env->cp15.c2_data); qemu_put_be32(f, env->cp15.c2_insn); qemu_put_be32(f, env->cp15.c3); @@ -38,8 +44,12 @@ void cpu_save(QEMUFile *f, void *opaque) } qemu_put_be32(f, env->cp15.c6_insn); qemu_put_be32(f, env->cp15.c6_data); + qemu_put_be32(f, env->cp15.c7_par); qemu_put_be32(f, env->cp15.c9_insn); qemu_put_be32(f, env->cp15.c9_data); + qemu_put_be32(f, env->cp15.c9_pmcr_data); + qemu_put_be32(f, env->cp15.c9_useren); + qemu_put_be32(f, env->cp15.c9_inten); qemu_put_be32(f, env->cp15.c13_fcse); qemu_put_be32(f, env->cp15.c13_context); qemu_put_be32(f, env->cp15.c13_tls1); @@ -47,6 +57,8 @@ void cpu_save(QEMUFile *f, void *opaque) qemu_put_be32(f, env->cp15.c13_tls3); qemu_put_be32(f, env->cp15.c15_cpar); + qemu_put_be32(f, env->cp14_dbgdidr); + qemu_put_be32(f, env->features); if (arm_feature(env, ARM_FEATURE_VFP)) { @@ -91,12 +103,18 @@ void cpu_save(QEMUFile *f, void *opaque) qemu_put_be32(f, env->v7m.current_sp); qemu_put_be32(f, env->v7m.exception); } + + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { + qemu_put_be32(f, env->teecr); + qemu_put_be32(f, env->teehbr); + } } int cpu_load(QEMUFile *f, void *opaque, int version_id) { CPUARMState *env = (CPUARMState *)opaque; int i; + uint32_t val; if (version_id != CPU_SAVE_VERSION) return -EINVAL; @@ -104,7 +122,10 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) for (i = 0; i < 16; i++) { env->regs[i] = qemu_get_be32(f); } - cpsr_write(env, qemu_get_be32(f), 0xffffffff); + val = qemu_get_be32(f); + /* Avoid mode switch when restoring CPSR. */ + env->uncached_cpsr = val & CPSR_M; + cpsr_write(env, val, 0xffffffff); env->spsr = qemu_get_be32(f); for (i = 0; i < 6; i++) { env->banked_spsr[i] = qemu_get_be32(f); @@ -117,12 +138,18 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) } env->cp15.c0_cpuid = qemu_get_be32(f); env->cp15.c0_cachetype = qemu_get_be32(f); + env->cp15.c0_cssel = qemu_get_be32(f); env->cp15.c1_sys = qemu_get_be32(f); env->cp15.c1_coproc = qemu_get_be32(f); env->cp15.c1_xscaleauxcr = qemu_get_be32(f); + env->cp15.c1_secfg = qemu_get_be32(f); + env->cp15.c1_sedbg = qemu_get_be32(f); + env->cp15.c1_nseac = qemu_get_be32(f); env->cp15.c2_base0 = qemu_get_be32(f); env->cp15.c2_base1 = qemu_get_be32(f); + env->cp15.c2_control = qemu_get_be32(f); env->cp15.c2_mask = qemu_get_be32(f); + env->cp15.c2_base_mask = qemu_get_be32(f); env->cp15.c2_data = qemu_get_be32(f); env->cp15.c2_insn = qemu_get_be32(f); env->cp15.c3 = qemu_get_be32(f); @@ -133,8 +160,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) } env->cp15.c6_insn = qemu_get_be32(f); env->cp15.c6_data = qemu_get_be32(f); + env->cp15.c7_par = qemu_get_be32(f); env->cp15.c9_insn = qemu_get_be32(f); env->cp15.c9_data = qemu_get_be32(f); + env->cp15.c9_pmcr_data = qemu_get_be32(f); + env->cp15.c9_useren = qemu_get_be32(f); + env->cp15.c9_inten = qemu_get_be32(f); env->cp15.c13_fcse = qemu_get_be32(f); env->cp15.c13_context = qemu_get_be32(f); env->cp15.c13_tls1 = qemu_get_be32(f); @@ -142,6 +173,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) env->cp15.c13_tls3 = qemu_get_be32(f); env->cp15.c15_cpar = qemu_get_be32(f); + env->cp14_dbgdidr = qemu_get_be32(f); + env->features = qemu_get_be32(f); if (arm_feature(env, ARM_FEATURE_VFP)) { @@ -187,5 +220,10 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) env->v7m.exception = qemu_get_be32(f); } + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { + env->teecr = qemu_get_be32(f); + env->teehbr = qemu_get_be32(f); + } + return 0; } diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index f32ecd6..9165519 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -10,39 +10,15 @@ #include <stdio.h> #include "cpu.h" -#include "exec-all.h" -#include "helpers.h" +#include "exec.h" +#include "helper.h" #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q -static float_status neon_float_status; -#define NFS &neon_float_status - -/* Helper routines to perform bitwise copies between float and int. */ -static inline float32 vfp_itos(uint32_t i) -{ - union { - uint32_t i; - float32 s; - } v; - - v.i = i; - return v.s; -} - -static inline uint32_t vfp_stoi(float32 s) -{ - union { - uint32_t i; - float32 s; - } v; - - v.s = s; - return v.i; -} +#define NFS (&env->vfp.standard_fp_status) #define NEON_TYPE1(name, type) \ typedef struct \ @@ -139,10 +115,6 @@ NEON_TYPE1(u32, uint32_t) uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ NEON_VOP_BODY(vtype, n) -#define NEON_VOP_ENV(name, vtype, n) \ -uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ -NEON_VOP_BODY(vtype, n) - /* Pairwise operations. */ /* For 32-bit elements each segment only contains a single element, so the elementwise and pairwise operations are the same. */ @@ -191,13 +163,35 @@ uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ dest = tmp; \ }} while(0) #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qadd_u8, neon_u8, 4) +NEON_VOP(qadd_u8, neon_u8, 4) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qadd_u16, neon_u16, 2) +NEON_VOP(qadd_u16, neon_u16, 2) #undef NEON_FN #undef NEON_USAT +uint32_t HELPER(neon_qadd_u32)(uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + SET_QC(); + res = ~0; + } + return res; +} + +uint64_t HELPER(neon_qadd_u64)(uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (res < src1) { + SET_QC(); + res = ~(uint64_t)0; + } + return res; +} + #define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -211,13 +205,35 @@ NEON_VOP_ENV(qadd_u16, neon_u16, 2) dest = tmp; \ } while(0) #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qadd_s8, neon_s8, 4) +NEON_VOP(qadd_s8, neon_s8, 4) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qadd_s16, neon_s16, 2) +NEON_VOP(qadd_s16, neon_s16, 2) #undef NEON_FN #undef NEON_SSAT +uint32_t HELPER(neon_qadd_s32)(uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qadd_s64)(uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + #define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -227,13 +243,36 @@ NEON_VOP_ENV(qadd_s16, neon_s16, 2) dest = tmp; \ }} while(0) #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qsub_u8, neon_u8, 4) +NEON_VOP(qsub_u8, neon_u8, 4) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qsub_u16, neon_u16, 2) +NEON_VOP(qsub_u16, neon_u16, 2) #undef NEON_FN #undef NEON_USAT +uint32_t HELPER(neon_qsub_u32)(uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + SET_QC(); + res = 0; + } + return res; +} + +uint64_t HELPER(neon_qsub_u64)(uint64_t src1, uint64_t src2) +{ + uint64_t res; + + if (src1 < src2) { + SET_QC(); + res = 0; + } else { + res = src1 - src2; + } + return res; +} + #define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -247,13 +286,35 @@ NEON_VOP_ENV(qsub_u16, neon_u16, 2) dest = tmp; \ } while(0) #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qsub_s8, neon_s8, 4) +NEON_VOP(qsub_s8, neon_s8, 4) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qsub_s16, neon_s16, 2) +NEON_VOP(qsub_s16, neon_s16, 2) #undef NEON_FN #undef NEON_SSAT +uint32_t HELPER(neon_qsub_s32)(uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qsub_s64)(uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 - src2; + if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 NEON_VOP(hadd_s8, neon_s8, 4) NEON_VOP(hadd_u8, neon_u8, 4) @@ -392,7 +453,8 @@ NEON_VOP(abd_u32, neon_u32, 1) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8 || \ + tmp <= -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ @@ -420,9 +482,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ dest = 0; \ - } else if (tmp <= -sizeof(src1) * 8) { \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ @@ -453,14 +515,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8) { \ + if ((tmp >= (ssize_t)sizeof(src1) * 8) \ + || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ dest = 0; \ - } else if (tmp < -sizeof(src1) * 8) { \ - dest = src1 >> (sizeof(src1) * 8 - 1); \ - } else if (tmp == -sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ - dest++; \ - dest >>= 1; \ } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ @@ -468,23 +525,45 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) }} while (0) NEON_VOP(rshl_s8, neon_s8, 4) NEON_VOP(rshl_s16, neon_s16, 2) -NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if ((shift >= 32) || (shift <= -32)) { + dest = 0; + } else if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; int64_t val = valop; - if (shift >= 64) { + if ((shift >= 64) || (shift <= -64)) { val = 0; - } else if (shift < -64) { - val >>= 63; - } else if (shift == -63) { - val >>= 63; - val++; - val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000LL; + } else { + val++; + val >>= 1; + } } else { val <<= shift; } @@ -494,10 +573,11 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8 || \ + tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ - } else if (tmp == -sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> (-tmp - 1); \ } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ @@ -505,20 +585,48 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) }} while (0) NEON_VOP(rshl_u8, neon_u8, 4) NEON_VOP(rshl_u16, neon_u16, 2) -NEON_VOP(rshl_u32, neon_u32, 1) #undef NEON_FN +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) +{ + uint32_t dest; + int8_t shift = (int8_t)shiftop; + if (shift >= 32 || shift < -32) { + dest = 0; + } else if (shift == -32) { + dest = val >> 31; + } else if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < 64) { + if (shift >= 64 || shift < -64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { val <<= shift; } @@ -528,14 +636,14 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ if (src1) { \ SET_QC(); \ dest = ~0; \ } else { \ dest = 0; \ } \ - } else if (tmp <= -sizeof(src1) * 8) { \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ @@ -546,20 +654,18 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) dest = ~0; \ } \ }} while (0) -NEON_VOP_ENV(qshl_u8, neon_u8, 4) -NEON_VOP_ENV(qshl_u16, neon_u16, 2) -NEON_VOP_ENV(qshl_u32, neon_u32, 1) +NEON_VOP(qshl_u8, neon_u8, 4) +NEON_VOP(qshl_u16, neon_u16, 2) +NEON_VOP(qshl_u32, neon_u32, 1) #undef NEON_FN -uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) +uint64_t HELPER(neon_qshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; if (shift >= 64) { if (val) { val = ~(uint64_t)0; SET_QC(); - } else { - val = 0; } } else if (shift <= -64) { val = 0; @@ -579,11 +685,17 @@ uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp >= sizeof(src1) * 8) { \ - if (src1) \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ SET_QC(); \ - dest = src1 >> 31; \ - } else if (tmp <= -sizeof(src1) * 8) { \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } else { \ + dest = src1; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> 31; \ } else if (tmp < 0) { \ dest = src1 >> -tmp; \ @@ -591,24 +703,27 @@ uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ - dest = src2 >> 31; \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ } \ }} while (0) -NEON_VOP_ENV(qshl_s8, neon_s8, 4) -NEON_VOP_ENV(qshl_s16, neon_s16, 2) -NEON_VOP_ENV(qshl_s32, neon_s32, 1) +NEON_VOP(qshl_s8, neon_s8, 4) +NEON_VOP(qshl_s16, neon_s16, 2) +NEON_VOP(qshl_s32, neon_s32, 1) #undef NEON_FN -uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) +uint64_t HELPER(neon_qshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; int64_t val = valop; if (shift >= 64) { if (val) { SET_QC(); - val = (val >> 63) & ~SIGNBIT64; + val = (val >> 63) ^ ~SIGNBIT64; } - } else if (shift <= 64) { + } else if (shift <= -64) { val >>= 63; } else if (shift < 0) { val >>= -shift; @@ -623,12 +738,70 @@ uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) return val; } +#define NEON_FN(dest, src1, src2) do { \ + if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ + SET_QC(); \ + dest = 0; \ + } else { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = 0; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = ~0; \ + } \ + } \ + }} while (0) +NEON_VOP(qshlu_s8, neon_u8, 4) +NEON_VOP(qshlu_s16, neon_u16, 2) +#undef NEON_FN + +uint32_t HELPER(neon_qshlu_s32)(uint32_t valop, uint32_t shiftop) +{ + if ((int32_t)valop < 0) { + SET_QC(); + return 0; + } + return helper_neon_qshl_u32(valop, shiftop); +} + +uint64_t HELPER(neon_qshlu_s64)(uint64_t valop, uint64_t shiftop) +{ + if ((int64_t)valop < 0) { + SET_QC(); + return 0; + } + return helper_neon_qshl_u64(valop, shiftop); +} /* FIXME: This is wrong. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp < 0) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = 0; \ + } \ + } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> (sizeof(src1) * 8 - 1); \ + } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ @@ -637,16 +810,65 @@ uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) dest = ~0; \ } \ }} while (0) -NEON_VOP_ENV(qrshl_u8, neon_u8, 4) -NEON_VOP_ENV(qrshl_u16, neon_u16, 2) -NEON_VOP_ENV(qrshl_u32, neon_u32, 1) +NEON_VOP(qrshl_u8, neon_u8, 4) +NEON_VOP(qrshl_u16, neon_u16, 2) #undef NEON_FN -uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_qrshl_u32)(uint32_t val, uint32_t shiftop) { + uint32_t dest; int8_t shift = (int8_t)shiftop; - if (shift < 0) { - val = (val + (1 << (-1 - shift))) >> -shift; + if (shift >= 32) { + if (val) { + SET_QC(); + dest = ~0; + } else { + dest = 0; + } + } else if (shift < -32) { + dest = 0; + } else if (shift == -32) { + dest = val >> 31; + } else if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = ~0; + } + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ +uint64_t HELPER(neon_qrshl_u64)(uint64_t val, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + if (shift >= 64) { + if (val) { + SET_QC(); + val = ~0; + } + } else if (shift < -64) { + val = 0; + } else if (shift == -64) { + val >>= 63; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { \ uint64_t tmp = val; val <<= shift; @@ -661,33 +883,94 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ - if (tmp < 0) { \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = (1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } else { \ + dest = 0; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ } else { \ dest = src1 << tmp; \ if ((dest >> tmp) != src1) { \ SET_QC(); \ - dest = src1 >> 31; \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ } \ }} while (0) -NEON_VOP_ENV(qrshl_s8, neon_s8, 4) -NEON_VOP_ENV(qrshl_s16, neon_s16, 2) -NEON_VOP_ENV(qrshl_s32, neon_s32, 1) +NEON_VOP(qrshl_s8, neon_s8, 4) +NEON_VOP(qrshl_s16, neon_s16, 2) #undef NEON_FN -uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator. */ +uint32_t HELPER(neon_qrshl_s32)(uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if (shift >= 32) { + if (val) { + SET_QC(); + dest = (val >> 31) ^ ~SIGNBIT; + } else { + dest = 0; + } + } else if (shift <= -32) { + dest = 0; + } else if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = (val >> 31) ^ ~SIGNBIT; + } + } + return dest; +} + +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ +uint64_t HELPER(neon_qrshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; int64_t val = valop; - if (shift < 0) { - val = (val + (1 << (-1 - shift))) >> -shift; + if (shift >= 64) { + if (val) { + SET_QC(); + val = (val >> 63) ^ ~SIGNBIT64; + } + } else if (shift <= -64) { + val = 0; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000ULL; + } else { + val++; + val >>= 1; + } } else { - int64_t tmp = val;; + int64_t tmp = val; val <<= shift; if ((val >> shift) != tmp) { SET_QC(); - val = tmp >> 31; + val = (tmp >> 63) ^ ~SIGNBIT64; } } return val; @@ -750,6 +1033,36 @@ uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) return result; } +uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) +{ + uint64_t result = 0; + uint64_t mask; + uint64_t op2ex = op2; + op2ex = (op2ex & 0xff) | + ((op2ex & 0xff00) << 8) | + ((op2ex & 0xff0000) << 16) | + ((op2ex & 0xff000000) << 24); + while (op1) { + mask = 0; + if (op1 & 1) { + mask |= 0xffff; + } + if (op1 & (1 << 8)) { + mask |= (0xffffU << 16); + } + if (op1 & (1 << 16)) { + mask |= (0xffffULL << 32); + } + if (op1 & (1 << 24)) { + mask |= (0xffffULL << 48); + } + result ^= op2ex & mask; + op1 = (op1 >> 1) & 0x7f7f7f7f; + op2ex <<= 1; + } + return result; +} + #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 NEON_VOP(tst_u8, neon_u8, 4) NEON_VOP(tst_u16, neon_u16, 2) @@ -824,8 +1137,9 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x) if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ SET_QC(); \ tmp = (tmp >> 31) ^ ~SIGNBIT; \ + } else { \ + tmp <<= 1; \ } \ - tmp <<= 1; \ if (round) { \ int32_t old = tmp; \ tmp += 1 << 15; \ @@ -837,10 +1151,10 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x) dest = tmp >> 16; \ } while(0) #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) -NEON_VOP_ENV(qdmulh_s16, neon_s16, 2) +NEON_VOP(qdmulh_s16, neon_s16, 2) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) -NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) +NEON_VOP(qrdmulh_s16, neon_s16, 2) #undef NEON_FN #undef NEON_QDMULH16 @@ -863,10 +1177,10 @@ NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) dest = tmp >> 32; \ } while(0) #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) -NEON_VOP_ENV(qdmulh_s32, neon_s32, 1) +NEON_VOP(qdmulh_s32, neon_s32, 1) #undef NEON_FN #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) -NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) +NEON_VOP(qrdmulh_s32, neon_s32, 1) #undef NEON_FN #undef NEON_QDMULH32 @@ -907,7 +1221,34 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); } -uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_unarrow_sat8)(uint64_t x) +{ + uint16_t s; + uint8_t d; + uint32_t res = 0; +#define SAT8(n) \ + s = x >> n; \ + if (s & 0x8000) { \ + SET_QC(); \ + } else { \ + if (s > 0xff) { \ + d = 0xff; \ + SET_QC(); \ + } else { \ + d = s; \ + } \ + res |= (uint32_t)d << (n / 2); \ + } + + SAT8(0); + SAT8(16); + SAT8(32); + SAT8(48); +#undef SAT8 + return res; +} + +uint32_t HELPER(neon_narrow_sat_u8)(uint64_t x) { uint16_t s; uint8_t d; @@ -930,7 +1271,7 @@ uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) return res; } -uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_narrow_sat_s8)(uint64_t x) { int16_t s; uint8_t d; @@ -953,7 +1294,30 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) return res; } -uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_unarrow_sat16)(uint64_t x) +{ + uint32_t high; + uint32_t low; + low = x; + if (low & 0x80000000) { + low = 0; + SET_QC(); + } else if (low > 0xffff) { + low = 0xffff; + SET_QC(); + } + high = x >> 32; + if (high & 0x80000000) { + high = 0; + SET_QC(); + } else if (high > 0xffff) { + high = 0xffff; + SET_QC(); + } + return low | (high << 16); +} + +uint32_t HELPER(neon_narrow_sat_u16)(uint64_t x) { uint32_t high; uint32_t low; @@ -970,7 +1334,7 @@ uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) return low | (high << 16); } -uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_narrow_sat_s16)(uint64_t x) { int32_t low; int32_t high; @@ -987,7 +1351,20 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) return (uint16_t)low | (high << 16); } -uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_unarrow_sat32)(uint64_t x) +{ + if (x & 0x8000000000000000ull) { + SET_QC(); + return 0; + } + if (x > 0xffffffffu) { + SET_QC(); + return 0xffffffffu; + } + return x; +} + +uint32_t HELPER(neon_narrow_sat_u32)(uint64_t x) { if (x > 0xffffffffu) { SET_QC(); @@ -996,11 +1373,11 @@ uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) return x; } -uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) +uint32_t HELPER(neon_narrow_sat_s32)(uint64_t x) { if ((int64_t)x != (int32_t)x) { SET_QC(); - return (x >> 63) ^ 0x7fffffff; + return ((int64_t)x >> 63) ^ 0x7fffffff; } return x; } @@ -1103,7 +1480,7 @@ uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) return (a - b) ^ mask; } -uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(neon_addl_saturate_s32)(uint64_t a, uint64_t b) { uint32_t x, y; uint32_t low, high; @@ -1125,7 +1502,7 @@ uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) return low | ((uint64_t)high << 32); } -uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) +uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b) { uint64_t result; @@ -1137,9 +1514,13 @@ uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) return result; } -#define DO_ABD(dest, x, y, type) do { \ - type tmp_x = x; \ - type tmp_y = y; \ +/* We have to do the arithmetic in a larger type than + * the input type, because for example with a signed 32 bit + * op the absolute difference can overflow a signed 32 bit value. + */ +#define DO_ABD(dest, x, y, intype, arithtype) do { \ + arithtype tmp_x = (intype)(x); \ + arithtype tmp_y = (intype)(y); \ dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ } while(0) @@ -1147,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) { uint64_t tmp; uint64_t result; - DO_ABD(result, a, b, uint8_t); - DO_ABD(tmp, a >> 8, b >> 8, uint8_t); + DO_ABD(result, a, b, uint8_t, uint32_t); + DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t); result |= tmp << 16; - DO_ABD(tmp, a >> 16, b >> 16, uint8_t); + DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t); result |= tmp << 32; - DO_ABD(tmp, a >> 24, b >> 24, uint8_t); + DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t); result |= tmp << 48; return result; } @@ -1161,12 +1542,12 @@ uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) { uint64_t tmp; uint64_t result; - DO_ABD(result, a, b, int8_t); - DO_ABD(tmp, a >> 8, b >> 8, int8_t); + DO_ABD(result, a, b, int8_t, int32_t); + DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t); result |= tmp << 16; - DO_ABD(tmp, a >> 16, b >> 16, int8_t); + DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t); result |= tmp << 32; - DO_ABD(tmp, a >> 24, b >> 24, int8_t); + DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t); result |= tmp << 48; return result; } @@ -1175,8 +1556,8 @@ uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) { uint64_t tmp; uint64_t result; - DO_ABD(result, a, b, uint16_t); - DO_ABD(tmp, a >> 16, b >> 16, uint16_t); + DO_ABD(result, a, b, uint16_t, uint32_t); + DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t); return result | (tmp << 32); } @@ -1184,22 +1565,22 @@ uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) { uint64_t tmp; uint64_t result; - DO_ABD(result, a, b, int16_t); - DO_ABD(tmp, a >> 16, b >> 16, int16_t); + DO_ABD(result, a, b, int16_t, int32_t); + DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t); return result | (tmp << 32); } uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) { uint64_t result; - DO_ABD(result, a, b, uint32_t); + DO_ABD(result, a, b, uint32_t, uint64_t); return result; } uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) { uint64_t result; - DO_ABD(result, a, b, int32_t); + DO_ABD(result, a, b, int32_t, int64_t); return result; } #undef DO_ABD @@ -1275,7 +1656,6 @@ uint64_t HELPER(neon_negl_u16)(uint64_t x) return result; } -#include <stdio.h> uint64_t HELPER(neon_negl_u32)(uint64_t x) { uint32_t low = -x; @@ -1298,7 +1678,7 @@ uint64_t HELPER(neon_negl_u64)(uint64_t x) } else if (x < 0) { \ x = -x; \ }} while (0) -uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qabs_s8)(uint32_t x) { neon_s8 vec; NEON_UNPACK(neon_s8, vec, x); @@ -1318,7 +1698,7 @@ uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) } else { \ x = -x; \ }} while (0) -uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qneg_s8)(uint32_t x) { neon_s8 vec; NEON_UNPACK(neon_s8, vec, x); @@ -1338,7 +1718,7 @@ uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) } else if (x < 0) { \ x = -x; \ }} while (0) -uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qabs_s16)(uint32_t x) { neon_s16 vec; NEON_UNPACK(neon_s16, vec, x); @@ -1356,7 +1736,7 @@ uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) } else { \ x = -x; \ }} while (0) -uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qneg_s16)(uint32_t x) { neon_s16 vec; NEON_UNPACK(neon_s16, vec, x); @@ -1367,7 +1747,7 @@ uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) } #undef DO_QNEG16 -uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qabs_s32)(uint32_t x) { if (x == SIGNBIT) { SET_QC(); @@ -1378,7 +1758,7 @@ uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) return x; } -uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) +uint32_t HELPER(neon_qneg_s32)(uint32_t x) { if (x == SIGNBIT) { SET_QC(); @@ -1392,66 +1772,251 @@ uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) /* NEON Float helpers. */ uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) { - float32 f0 = vfp_itos(a); - float32 f1 = vfp_itos(b); - return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; + return float32_val(float32_min(make_float32(a), make_float32(b), NFS)); } uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) { - float32 f0 = vfp_itos(a); - float32 f1 = vfp_itos(b); - return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; + return float32_val(float32_max(make_float32(a), make_float32(b), NFS)); } uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) { - float32 f0 = vfp_itos(a); - float32 f1 = vfp_itos(b); - return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) - ? float32_sub(f0, f1, NFS) - : float32_sub(f1, f0, NFS)); + float32 f0 = make_float32(a); + float32 f1 = make_float32(b); + return float32_val(float32_abs(float32_sub(f0, f1, NFS))); } uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) { - return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS)); + return float32_val(float32_add(make_float32(a), make_float32(b), NFS)); } uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) { - return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS)); + return float32_val(float32_sub(make_float32(a), make_float32(b), NFS)); } uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) { - return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS)); + return float32_val(float32_mul(make_float32(a), make_float32(b), NFS)); } -/* Floating point comparisons produce an integer result. */ -#define NEON_VOP_FCMP(name, cmp) \ -uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ -{ \ - if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ - return ~0; \ - else \ - return 0; \ +/* Floating point comparisons produce an integer result. + * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. + * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires. + */ +uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b) +{ + return -float32_eq_quiet(make_float32(a), make_float32(b), NFS); +} + +uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b) +{ + return -float32_le(make_float32(b), make_float32(a), NFS); } -NEON_VOP_FCMP(ceq_f32, ==) -NEON_VOP_FCMP(cge_f32, >=) -NEON_VOP_FCMP(cgt_f32, >) +uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b) +{ + return -float32_lt(make_float32(b), make_float32(a), NFS); +} uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) { - float32 f0 = float32_abs(vfp_itos(a)); - float32 f1 = float32_abs(vfp_itos(b)); - return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; + float32 f0 = float32_abs(make_float32(a)); + float32 f1 = float32_abs(make_float32(b)); + return -float32_le(f1, f0, NFS); } uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) { - float32 f0 = float32_abs(vfp_itos(a)); - float32 f1 = float32_abs(vfp_itos(b)); - return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; + float32 f0 = float32_abs(make_float32(a)); + float32 f1 = float32_abs(make_float32(b)); + return -float32_lt(f1, f0, NFS); +} + +#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) + +void HELPER(neon_qunzip8)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) + | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) + | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) + | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); + uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) + | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) + | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) + | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); + uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) + | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) + | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) + | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); + uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) + | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) + | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) + | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qunzip16)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) + | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); + uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) + | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); + uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) + | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); + uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) + | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qunzip32)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); + uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); + uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); + uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_unzip8)(uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) + | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) + | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) + | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); + uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) + | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) + | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) + | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_unzip16)(uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) + | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); + uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) + | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_qzip8)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) + | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) + | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) + | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); + uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) + | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) + | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) + | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); + uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) + | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) + | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) + | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); + uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) + | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) + | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) + | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qzip16)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) + | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); + uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) + | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); + uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) + | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); + uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) + | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qzip32)(uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); + uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); + uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); + uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_zip8)(uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) + | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) + | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) + | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); + uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) + | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) + | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) + | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_zip16)(uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) + | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); + uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) + | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); } diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h index 29f77ba..c02c92a 100644 --- a/target-arm/op_addsub.h +++ b/target-arm/op_addsub.h @@ -73,8 +73,8 @@ uint32_t HELPER(glue(PFX,subaddx))(uint32_t a, uint32_t b GE_ARG) uint32_t res = 0; DECLARE_GE; - ADD16(a, b, 0); - SUB16(a >> 16, b >> 16, 1); + ADD16(a, b >> 16, 0); + SUB16(a >> 16, b, 1); SET_GE; return res; } @@ -84,8 +84,8 @@ uint32_t HELPER(glue(PFX,addsubx))(uint32_t a, uint32_t b GE_ARG) uint32_t res = 0; DECLARE_GE; - SUB16(a, b, 0); - ADD16(a >> 16, b >> 16, 1); + SUB16(a, b >> 16, 0); + ADD16(a >> 16, b, 1); SET_GE; return res; } diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index fc06536..ec6e5cc 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -14,11 +14,10 @@ * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "exec.h" -#include "helpers.h" +#include "helper.h" #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) @@ -29,20 +28,6 @@ void raise_exception(int tt) cpu_loop_exit(); } -/* thread support */ - -static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED; - -void cpu_lock(void) -{ - spin_lock(&global_cpu_lock); -} - -void cpu_unlock(void) -{ - spin_unlock(&global_cpu_lock); -} - uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, uint32_t rn, uint32_t maxindex) { @@ -67,12 +52,6 @@ uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, #if !defined(CONFIG_USER_ONLY) -//#define ALIGNED_ONLY 1 - -#if ALIGNED_ONLY == 1 -static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr); -#endif - #define MMUSUFFIX _mmu #define SHIFT 0 @@ -87,21 +66,6 @@ static void do_unaligned_access (target_ulong addr, int is_write, int is_user, v #define SHIFT 3 #include "softmmu_template.h" -#if ALIGNED_ONLY == 1 -static void do_unaligned_access (target_ulong addr, int is_write, int mmu_idx, void *retaddr) -{ - //printf("::UNALIGNED:: addr=%lx is_write=%d is_user=%d retaddr=%p\n", addr, is_write, is_user, retaddr); - if (mmu_idx) - { - env = cpu_single_env; - env->cp15.c5_data = 0x00000001; /* corresponds to an alignment fault */ - env->cp15.c6_data = addr; - env->exception_index = EXCP_DATA_ABORT; - cpu_loop_exit(); - } -} -#endif - /* try to fill the TLB and return an exception if error. If retaddr is NULL, it means that the function was called in C code (i.e. not from generated code or from helper.c) */ @@ -134,6 +98,47 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr) env = saved_env; } +void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val) +{ + int cp_num = (insn >> 8) & 0xf; + int cp_info = (insn >> 5) & 7; + int src = (insn >> 16) & 0xf; + int operand = insn & 0xf; + + if (env->cp[cp_num].cp_write) + env->cp[cp_num].cp_write(env->cp[cp_num].opaque, + cp_info, src, operand, val, GETPC()); + } + +uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn) +{ + int cp_num = (insn >> 8) & 0xf; + int cp_info = (insn >> 5) & 7; + int dest = (insn >> 16) & 0xf; + int operand = insn & 0xf; + + if (env->cp[cp_num].cp_read) + return env->cp[cp_num].cp_read(env->cp[cp_num].opaque, + cp_info, dest, operand, GETPC()); + return 0; +} + +#else + +void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val) +{ + int op1 = (insn >> 8) & 0xf; + cpu_abort(env, "cp%i insn %08x\n", op1, insn); + return; +} + +uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn) +{ + int op1 = (insn >> 8) & 0xf; + cpu_abort(env, "cp%i insn %08x\n", op1, insn); + return 0; +} + #endif /* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating @@ -402,14 +407,6 @@ uint32_t HELPER(sar)(uint32_t x, uint32_t i) return (int32_t)x >> shift; } -uint32_t HELPER(ror)(uint32_t x, uint32_t i) -{ - int shift = i & 0xff; - if (shift == 0) - return x; - return (x >> shift) | (x << (32 - shift)); -} - uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i) { int shift = i & 0xff; @@ -470,109 +467,126 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i) } } -uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { - env->QF = 1; - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (res < src1) { - env->QF = 1; - res = ~(uint64_t)0; - } - return res; -} - -uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 - src2; - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { - env->QF = 1; - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - if (src1 < src2) { - env->QF = 1; - res = 0; - } else { - res = src1 - src2; +void HELPER(neon_vldst_all)(uint32_t insn) +{ +#if defined(CONFIG_USER_ONLY) +#define LDB(addr) ldub(addr) +#define LDW(addr) lduw(addr) +#define LDL(addr) ldl(addr) +#define LDQ(addr) ldq(addr) +#define STB(addr, val) stb(addr, val) +#define STW(addr, val) stw(addr, val) +#define STL(addr, val) stl(addr, val) +#define STQ(addr, val) stq(addr, val) +#else + int user = cpu_mmu_index(env); +#define LDB(addr) slow_ldb_mmu(addr, user, GETPC()) +#define LDW(addr) slow_ldw_mmu(addr, user, GETPC()) +#define LDL(addr) slow_ldl_mmu(addr, user, GETPC()) +#define LDQ(addr) slow_ldq_mmu(addr, user, GETPC()) +#define STB(addr, val) slow_stb_mmu(addr, val, user, GETPC()) +#define STW(addr, val) slow_stw_mmu(addr, val, user, GETPC()) +#define STL(addr, val) slow_stl_mmu(addr, val, user, GETPC()) +#define STQ(addr, val) slow_stq_mmu(addr, val, user, GETPC()) +#endif + static const struct { + int nregs; + int interleave; + int spacing; + } neon_ls_element_type[11] = { + {4, 4, 1}, + {4, 4, 2}, + {4, 1, 1}, + {4, 2, 1}, + {3, 3, 1}, + {3, 3, 2}, + {3, 1, 1}, + {1, 1, 1}, + {2, 2, 1}, + {2, 2, 2}, + {2, 1, 1} + }; + + const int op = (insn >> 8) & 0xf; + const int size = (insn >> 6) & 3; + int rd = ((insn >> 12) & 0x0f) | ((insn >> 18) & 0x10); + const int rn = (insn >> 16) & 0xf; + const int load = (insn & (1 << 21)) != 0; + const int nregs = neon_ls_element_type[op].nregs; + const int interleave = neon_ls_element_type[op].interleave; + const int spacing = neon_ls_element_type[op].spacing; + uint32_t addr = env->regs[rn]; + const int stride = (1 << size) * interleave; + int i, reg; + uint64_t tmp64; + + for (reg = 0; reg < nregs; reg++) { + if (interleave > 2 || (interleave == 2 && nregs == 2)) { + addr = env->regs[rn] + (1 << size) * reg; + } else if (interleave == 2 && nregs == 4 && reg == 2) { + addr = env->regs[rn] + (1 << size); + } + switch (size) { + case 3: + if (load) { + env->vfp.regs[rd] = make_float64(LDQ(addr)); + } else { + STQ(addr, float64_val(env->vfp.regs[rd])); + } + addr += stride; + break; + case 2: + if (load) { + tmp64 = (uint32_t)LDL(addr); + addr += stride; + tmp64 |= (uint64_t)LDL(addr) << 32; + addr += stride; + env->vfp.regs[rd] = make_float64(tmp64); + } else { + tmp64 = float64_val(env->vfp.regs[rd]); + STL(addr, tmp64); + addr += stride; + STL(addr, tmp64 >> 32); + addr += stride; + } + break; + case 1: + if (load) { + tmp64 = 0ull; + for (i = 0; i < 4; i++, addr += stride) { + tmp64 |= (uint64_t)LDW(addr) << (i * 16); + } + env->vfp.regs[rd] = make_float64(tmp64); + } else { + tmp64 = float64_val(env->vfp.regs[rd]); + for (i = 0; i < 4; i++, addr += stride, tmp64 >>= 16) { + STW(addr, tmp64); + } + } + break; + case 0: + if (load) { + tmp64 = 0ull; + for (i = 0; i < 8; i++, addr += stride) { + tmp64 |= (uint64_t)LDB(addr) << (i * 8); + } + env->vfp.regs[rd] = make_float64(tmp64); + } else { + tmp64 = float64_val(env->vfp.regs[rd]); + for (i = 0; i < 8; i++, addr += stride, tmp64 >>= 8) { + STB(addr, tmp64); + } + } + break; + } + rd += spacing; } - return res; -} - -/* These need to return a pair of value, so still use T0/T1. */ -/* Transpose. Argument order is rather strange to avoid special casing - the tranlation code. - On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */ -void HELPER(neon_trn_u8)(void) -{ - uint32_t rd; - uint32_t rm; - rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff); - rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00); - T0 = rd; - T1 = rm; -} - -void HELPER(neon_trn_u16)(void) -{ - uint32_t rd; - uint32_t rm; - rd = (T0 << 16) | (T1 & 0xffff); - rm = (T1 >> 16) | (T0 & 0xffff0000); - T0 = rd; - T1 = rm; -} - -/* Worker routines for zip and unzip. */ -void HELPER(neon_unzip_u8)(void) -{ - uint32_t rd; - uint32_t rm; - rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00) - | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000); - rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00) - | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000); - T0 = rd; - T1 = rm; -} - -void HELPER(neon_zip_u8)(void) -{ - uint32_t rd; - uint32_t rm; - rd = (T0 & 0xff) | ((T1 << 8) & 0xff00) - | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000); - rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00) - | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000); - T0 = rd; - T1 = rm; -} - -void HELPER(neon_zip_u16)(void) -{ - uint32_t tmp; - - tmp = (T0 & 0xffff) | (T1 << 16); - T1 = (T1 & 0xffff0000) | (T0 >> 16); - T0 = tmp; +#undef LDB +#undef LDW +#undef LDL +#undef LDQ +#undef STB +#undef STW +#undef STL +#undef STQ } diff --git a/target-arm/translate.c b/target-arm/translate.c index 1e189f8..05712b8 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -30,10 +30,14 @@ #include "tcg-op.h" #include "qemu-log.h" -#include "helpers.h" +#include "helper.h" #define GEN_HELPER 1 -#include "helpers.h" +#include "helper.h" +#define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T) +#define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5) +/* currently all emulated v5 cores are also v5TE, so don't bother */ +#define ENABLE_ARCH_5TE arm_feature(env, ARM_FEATURE_V5) #define ENABLE_ARCH_5J 0 #define ENABLE_ARCH_6 arm_feature(env, ARM_FEATURE_V6) #define ENABLE_ARCH_6K arm_feature(env, ARM_FEATURE_V6K) @@ -53,13 +57,15 @@ typedef struct DisasContext { /* Thumb-2 condtional execution bits. */ int condexec_mask; int condexec_cond; - int condexec_mask_prev; /* mask at start of instruction/block */ struct TranslationBlock *tb; int singlestep_enabled; int thumb; #if !defined(CONFIG_USER_ONLY) int user; #endif + int vfp_enabled; + int vec_len; + int vec_stride; #ifdef CONFIG_MEMCHECK int search_pc; #endif @@ -67,6 +73,8 @@ typedef struct DisasContext { #include "translate-android.h" +static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE]; + #if defined(CONFIG_USER_ONLY) #define IS_USER(s) 1 #else @@ -77,74 +85,62 @@ typedef struct DisasContext { conditional executions state has been updated. */ #define DISAS_WFI 4 #define DISAS_SWI 5 +#define DISAS_SMC 6 static TCGv_ptr cpu_env; /* We reuse the same 64-bit temporaries for efficiency. */ static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; +static TCGv_i32 cpu_R[16]; +static TCGv_i32 cpu_exclusive_addr; +static TCGv_i32 cpu_exclusive_val; +static TCGv_i32 cpu_exclusive_high; +#ifdef CONFIG_USER_ONLY +static TCGv_i32 cpu_exclusive_test; +static TCGv_i32 cpu_exclusive_info; +#endif /* FIXME: These should be removed. */ -static TCGv cpu_T[2]; static TCGv cpu_F0s, cpu_F1s; static TCGv_i64 cpu_F0d, cpu_F1d; -#define ICOUNT_TEMP cpu_T[0] #include "gen-icount.h" +static const char *regnames[] = + { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; + /* initialize TCG globals. */ void arm_translate_init(void) { + int i; + cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); - cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0"); - cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1"); + for (i = 0; i < 16; i++) { + cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, regs[i]), + regnames[i]); + } + cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, exclusive_addr), "exclusive_addr"); + cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, exclusive_val), "exclusive_val"); + cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, exclusive_high), "exclusive_high"); +#ifdef CONFIG_USER_ONLY + cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, exclusive_test), "exclusive_test"); + cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUState, exclusive_info), "exclusive_info"); +#endif #define GEN_HELPER 2 -#include "helpers.h" -} - -/* The code generator doesn't like lots of temporaries, so maintain our own - cache for reuse within a function. */ -#define MAX_TEMPS 8 -static int num_temps; -static TCGv temps[MAX_TEMPS]; - -/* Allocate a temporary variable. */ -static TCGv_i32 new_tmp(void) -{ - TCGv tmp; - if (num_temps == MAX_TEMPS) - abort(); - - if (GET_TCGV_I32(temps[num_temps])) - return temps[num_temps++]; - - tmp = tcg_temp_new_i32(); - temps[num_temps++] = tmp; - return tmp; -} - -/* Release a temporary variable. */ -static void dead_tmp(TCGv tmp) -{ - int i; - num_temps--; - i = num_temps; - if (TCGV_EQUAL(temps[i], tmp)) - return; - - /* Shuffle this temp to the last slot. */ - while (!TCGV_EQUAL(temps[i], tmp)) - i--; - while (i < num_temps) { - temps[i] = temps[i + 1]; - i++; - } - temps[i] = tmp; +#include "helper.h" } static inline TCGv load_cpu_offset(int offset) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp, cpu_env, offset); return tmp; } @@ -154,7 +150,7 @@ static inline TCGv load_cpu_offset(int offset) static inline void store_cpu_offset(TCGv var, int offset) { tcg_gen_st_i32(var, cpu_env, offset); - dead_tmp(var); + tcg_temp_free_i32(var); } #define store_cpu_field(var, name) \ @@ -172,14 +168,14 @@ static void load_reg_var(DisasContext *s, TCGv var, int reg) addr = (long)s->pc + 4; tcg_gen_movi_i32(var, addr); } else { - tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg])); + tcg_gen_mov_i32(var, cpu_R[reg]); } } /* Create a new temporary and set it to the value of a CPU register. */ static inline TCGv load_reg(DisasContext *s, int reg) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); load_reg_var(s, tmp, reg); return tmp; } @@ -192,38 +188,10 @@ static void store_reg(DisasContext *s, int reg, TCGv var) tcg_gen_andi_i32(var, var, ~1); s->is_jmp = DISAS_JUMP; } - tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg])); - dead_tmp(var); + tcg_gen_mov_i32(cpu_R[reg], var); + tcg_temp_free_i32(var); } - -/* Basic operations. */ -#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1]) -#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im) -#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im) - -#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im) -#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0]) - -#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0]) - -#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0]) -#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1]) -#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]); -#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]); - -#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im) -#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im) - /* Value extensions. */ #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var) #define gen_uxth(var) tcg_gen_ext16u_i32(var, var) @@ -233,57 +201,57 @@ static void store_reg(DisasContext *s, int reg, TCGv var) #define gen_sxtb16(var) gen_helper_sxtb16(var, var) #define gen_uxtb16(var) gen_helper_uxtb16(var, var) -#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask)) +static inline void gen_set_cpsr(TCGv var, uint32_t mask) +{ + TCGv tmp_mask = tcg_const_i32(mask); + gen_helper_cpsr_write(var, tmp_mask); + tcg_temp_free_i32(tmp_mask); +} /* Set NZCV flags from the high 4 bits of var. */ #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV) static void gen_exception(int excp) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, excp); gen_helper_exception(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_smul_dual(TCGv a, TCGv b) { - TCGv tmp1 = new_tmp(); - TCGv tmp2 = new_tmp(); + TCGv tmp1 = tcg_temp_new_i32(); + TCGv tmp2 = tcg_temp_new_i32(); tcg_gen_ext16s_i32(tmp1, a); tcg_gen_ext16s_i32(tmp2, b); tcg_gen_mul_i32(tmp1, tmp1, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_sari_i32(a, a, 16); tcg_gen_sari_i32(b, b, 16); tcg_gen_mul_i32(b, b, a); tcg_gen_mov_i32(a, tmp1); - dead_tmp(tmp1); + tcg_temp_free_i32(tmp1); } /* Byteswap each halfword. */ static void gen_rev16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 8); tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff); tcg_gen_shli_i32(var, var, 8); tcg_gen_andi_i32(var, var, 0xff00ff00); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Byteswap low halfword and sign extend. */ static void gen_revsh(TCGv var) { - TCGv tmp = new_tmp(); - tcg_gen_shri_i32(tmp, var, 8); - tcg_gen_andi_i32(tmp, tmp, 0x00ff); - tcg_gen_shli_i32(var, var, 8); - tcg_gen_ext8s_i32(var, var); - tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_gen_ext16u_i32(var, var); + tcg_gen_bswap16_i32(var, var); + tcg_gen_ext16s_i32(var, var); } /* Unsigned bitfield extract. */ @@ -318,11 +286,32 @@ static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask) tcg_gen_or_i32(dest, base, val); } -/* Round the top 32 bits of a 64-bit value. */ -static void gen_roundqd(TCGv a, TCGv b) +/* Return (b << 32) + a. Mark inputs as dead */ +static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b) { - tcg_gen_shri_i32(a, a, 31); - tcg_gen_add_i32(a, a, b); + TCGv_i64 tmp64 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tmp64, b); + tcg_temp_free_i32(b); + tcg_gen_shli_i64(tmp64, tmp64, 32); + tcg_gen_add_i64(a, tmp64, a); + + tcg_temp_free_i64(tmp64); + return a; +} + +/* Return (b << 32) - a. Mark inputs as dead. */ +static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b) +{ + TCGv_i64 tmp64 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tmp64, b); + tcg_temp_free_i32(b); + tcg_gen_shli_i64(tmp64, tmp64, 32); + tcg_gen_sub_i64(a, tmp64, a); + + tcg_temp_free_i64(tmp64); + return a; } /* FIXME: Most targets have native widening multiplication. @@ -334,10 +323,11 @@ static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b) TCGv_i64 tmp2 = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(tmp1, a); - dead_tmp(a); + tcg_temp_free_i32(a); tcg_gen_extu_i32_i64(tmp2, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_mul_i64(tmp1, tmp1, tmp2); + tcg_temp_free_i64(tmp2); return tmp1; } @@ -347,50 +337,22 @@ static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b) TCGv_i64 tmp2 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp1, a); - dead_tmp(a); + tcg_temp_free_i32(a); tcg_gen_ext_i32_i64(tmp2, b); - dead_tmp(b); + tcg_temp_free_i32(b); tcg_gen_mul_i64(tmp1, tmp1, tmp2); + tcg_temp_free_i64(tmp2); return tmp1; } -/* Unsigned 32x32->64 multiply. */ -static void gen_op_mull_T0_T1(void) -{ - TCGv_i64 tmp1 = tcg_temp_new_i64(); - TCGv_i64 tmp2 = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(tmp1, cpu_T[0]); - tcg_gen_extu_i32_i64(tmp2, cpu_T[1]); - tcg_gen_mul_i64(tmp1, tmp1, tmp2); - tcg_gen_trunc_i64_i32(cpu_T[0], tmp1); - tcg_gen_shri_i64(tmp1, tmp1, 32); - tcg_gen_trunc_i64_i32(cpu_T[1], tmp1); -} - -/* Signed 32x32->64 multiply. */ -static void gen_imull(TCGv a, TCGv b) -{ - TCGv_i64 tmp1 = tcg_temp_new_i64(); - TCGv_i64 tmp2 = tcg_temp_new_i64(); - - tcg_gen_ext_i32_i64(tmp1, a); - tcg_gen_ext_i32_i64(tmp2, b); - tcg_gen_mul_i64(tmp1, tmp1, tmp2); - tcg_gen_trunc_i64_i32(a, tmp1); - tcg_gen_shri_i64(tmp1, tmp1, 32); - tcg_gen_trunc_i64_i32(b, tmp1); -} -#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1]) - /* Swap low and high halfwords. */ static void gen_swap_half(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 16); tcg_gen_shli_i32(var, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -402,15 +364,15 @@ static void gen_swap_half(TCGv var) static void gen_add16(TCGv t0, TCGv t1) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_xor_i32(tmp, t0, t1); tcg_gen_andi_i32(tmp, tmp, 0x8000); tcg_gen_andi_i32(t0, t0, ~0x8000); tcg_gen_andi_i32(t1, t1, ~0x8000); tcg_gen_add_i32(t0, t0, t1); tcg_gen_xor_i32(t0, t0, tmp); - dead_tmp(tmp); - dead_tmp(t1); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(t1); } #define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF)) @@ -418,10 +380,10 @@ static void gen_add16(TCGv t0, TCGv t1) /* Set CF to the top bit of var. */ static void gen_set_CF_bit31(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_shri_i32(tmp, var, 31); gen_set_CF(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Set N and Z flags from var. */ @@ -432,13 +394,13 @@ static inline void gen_logic_CC(TCGv var) } /* T0 += T1 + CF. */ -static void gen_adc_T0_T1(void) +static void gen_adc(TCGv t0, TCGv t1) { TCGv tmp; - gen_op_addl_T0_T1(); + tcg_gen_add_i32(t0, t0, t1); tmp = load_cpu_field(CF); - tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp); - dead_tmp(tmp); + tcg_gen_add_i32(t0, t0, tmp); + tcg_temp_free_i32(tmp); } /* dest = T0 + T1 + CF. */ @@ -448,7 +410,7 @@ static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1) tcg_gen_add_i32(dest, t0, t1); tmp = load_cpu_field(CF); tcg_gen_add_i32(dest, dest, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* dest = T0 - T1 + CF - 1. */ @@ -459,48 +421,15 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1) tmp = load_cpu_field(CF); tcg_gen_add_i32(dest, dest, tmp); tcg_gen_subi_i32(dest, dest, 1); - dead_tmp(tmp); -} - -#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1]) -#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0]) - -/* T0 &= ~T1. Clobbers T1. */ -/* FIXME: Implement bic natively. */ -static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1) -{ - TCGv tmp = new_tmp(); - tcg_gen_not_i32(tmp, t1); - tcg_gen_and_i32(dest, t0, tmp); - dead_tmp(tmp); -} -static inline void gen_op_bicl_T0_T1(void) -{ - gen_op_notl_T1(); - gen_op_andl_T0_T1(); + tcg_temp_free_i32(tmp); } /* FIXME: Implement this natively. */ #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1) -/* FIXME: Implement this natively. */ -static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i) -{ - TCGv tmp; - - if (i == 0) - return; - - tmp = new_tmp(); - tcg_gen_shri_i32(tmp, t1, i); - tcg_gen_shli_i32(t1, t1, 32 - i); - tcg_gen_or_i32(t0, t1, tmp); - dead_tmp(tmp); -} - static void shifter_out_im(TCGv var, int shift) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); if (shift == 0) { tcg_gen_andi_i32(tmp, var, 1); } else { @@ -509,7 +438,7 @@ static void shifter_out_im(TCGv var, int shift) tcg_gen_andi_i32(tmp, tmp, 1); } gen_set_CF(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } /* Shift by immediate. Includes special handling for shift == 0. */ @@ -549,7 +478,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags) if (shift != 0) { if (flags) shifter_out_im(var, shift - 1); - tcg_gen_rori_i32(var, var, shift); break; + tcg_gen_rotri_i32(var, var, shift); break; } else { TCGv tmp = load_cpu_field(CF); if (flags) @@ -557,7 +486,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags) tcg_gen_shri_i32(var, var, 1); tcg_gen_shli_i32(tmp, tmp, 31); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } } }; @@ -577,10 +506,11 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop, case 0: gen_helper_shl(var, var, shift); break; case 1: gen_helper_shr(var, var, shift); break; case 2: gen_helper_sar(var, var, shift); break; - case 3: gen_helper_ror(var, var, shift); break; + case 3: tcg_gen_andi_i32(shift, shift, 0x1f); + tcg_gen_rotr_i32(var, var, shift); break; } } - dead_tmp(shift); + tcg_temp_free_i32(shift); } #define PAS_OP(pfx) \ @@ -602,11 +532,13 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b) tmp = tcg_temp_new_ptr(); tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); PAS_OP(s) + tcg_temp_free_ptr(tmp); break; case 5: tmp = tcg_temp_new_ptr(); tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); PAS_OP(u) + tcg_temp_free_ptr(tmp); break; #undef gen_pas_helper #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) @@ -629,7 +561,7 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b) /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */ #define PAS_OP(pfx) \ - switch (op2) { \ + switch (op1) { \ case 0: gen_pas_helper(glue(pfx,add8)); break; \ case 1: gen_pas_helper(glue(pfx,add16)); break; \ case 2: gen_pas_helper(glue(pfx,addsubx)); break; \ @@ -641,17 +573,19 @@ static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b) { TCGv_ptr tmp; - switch (op1) { + switch (op2) { #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp) case 0: tmp = tcg_temp_new_ptr(); tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); PAS_OP(s) + tcg_temp_free_ptr(tmp); break; case 4: tmp = tcg_temp_new_ptr(); tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); PAS_OP(u) + tcg_temp_free_ptr(tmp); break; #undef gen_pas_helper #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) @@ -715,7 +649,7 @@ static void gen_test_cc(int cc, int label) inv = gen_new_label(); tmp = load_cpu_field(CF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label); gen_set_label(inv); @@ -723,7 +657,7 @@ static void gen_test_cc(int cc, int label) case 9: /* ls: !C || Z */ tmp = load_cpu_field(CF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); break; @@ -731,43 +665,43 @@ static void gen_test_cc(int cc, int label) tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); break; case 11: /* lt: N != V -> N ^ V != 0 */ tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); break; case 12: /* gt: !Z && N == V */ inv = gen_new_label(); tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); gen_set_label(inv); break; case 13: /* le: Z || N != V */ tmp = load_cpu_field(ZF); tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = load_cpu_field(VF); tmp2 = load_cpu_field(NF); tcg_gen_xor_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); break; default: fprintf(stderr, "Bad condition code 0x%x\n", cc); abort(); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static const uint8_t table_logic_cc[16] = { @@ -795,35 +729,22 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) TCGv tmp; s->is_jmp = DISAS_UPDATE; - tmp = new_tmp(); if (s->thumb != (addr & 1)) { + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, addr & 1); tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb)); + tcg_temp_free_i32(tmp); } - tcg_gen_movi_i32(tmp, addr & ~1); - tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15])); - dead_tmp(tmp); + tcg_gen_movi_i32(cpu_R[15], addr & ~1); } /* Set PC and Thumb state from var. var is marked as dead. */ static inline void gen_bx(DisasContext *s, TCGv var) { - TCGv tmp; - s->is_jmp = DISAS_UPDATE; - tmp = new_tmp(); - tcg_gen_andi_i32(tmp, var, 1); - store_cpu_field(tmp, thumb); - tcg_gen_andi_i32(var, var, ~1); - store_cpu_field(var, regs[15]); -} - -/* TODO: This should be removed. Use gen_bx instead. */ -static inline void gen_bx_T0(DisasContext *s) -{ - TCGv tmp = new_tmp(); - tcg_gen_mov_i32(tmp, cpu_T[0]); - gen_bx(s, tmp); + tcg_gen_andi_i32(cpu_R[15], var, ~1); + tcg_gen_andi_i32(var, var, 1); + store_cpu_field(var, thumb); } /* Variant of store_reg which uses branch&exchange logic when storing @@ -839,105 +760,92 @@ static inline void store_reg_bx(CPUState *env, DisasContext *s, } } +/* Variant of store_reg which uses branch&exchange logic when storing + * to r15 in ARM architecture v5T and above. This is used for storing + * the results of a LDR/LDM/POP into r15, and corresponds to the cases + * in the ARM ARM which use the LoadWritePC() pseudocode function. */ +static inline void store_reg_from_load(CPUState *env, DisasContext *s, + int reg, TCGv var) +{ + if (reg == 15 && ENABLE_ARCH_5) { + gen_bx(s, var); + } else { + store_reg(s, reg, var); + } +} + +static inline void gen_smc(CPUState *env, DisasContext *s) +{ + tcg_gen_movi_i32(cpu_R[15], s->pc); + s->is_jmp = DISAS_SMC; +} + static inline TCGv gen_ld8s(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld8s(tmp, addr, index); return tmp; } static inline TCGv gen_ld8u(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld8u(tmp, addr, index); return tmp; } static inline TCGv gen_ld16s(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld16s(tmp, addr, index); return tmp; } static inline TCGv gen_ld16u(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld16u(tmp, addr, index); return tmp; } static inline TCGv gen_ld32(TCGv addr, int index) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld32u(tmp, addr, index); return tmp; } +static inline TCGv_i64 gen_ld64(TCGv addr, int index) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld64(tmp, addr, index); + return tmp; +} static inline void gen_st8(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st8(val, addr, index); - dead_tmp(val); + tcg_temp_free_i32(val); } static inline void gen_st16(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st16(val, addr, index); - dead_tmp(val); + tcg_temp_free_i32(val); } static inline void gen_st32(TCGv val, TCGv addr, int index) { tcg_gen_qemu_st32(val, addr, index); - dead_tmp(val); -} - -static inline void gen_movl_T0_reg(DisasContext *s, int reg) -{ - load_reg_var(s, cpu_T[0], reg); + tcg_temp_free_i32(val); } - -static inline void gen_movl_T1_reg(DisasContext *s, int reg) +static inline void gen_st64(TCGv_i64 val, TCGv addr, int index) { - load_reg_var(s, cpu_T[1], reg); -} - -static inline void gen_movl_T2_reg(DisasContext *s, int reg) -{ - load_reg_var(s, cpu_T[2], reg); + tcg_gen_qemu_st64(val, addr, index); + tcg_temp_free_i64(val); } static inline void gen_set_pc_im(uint32_t val) { - TCGv tmp = new_tmp(); - tcg_gen_movi_i32(tmp, val); - store_cpu_field(tmp, regs[15]); -} - -static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t) -{ - TCGv tmp; - if (reg == 15) { - tmp = new_tmp(); - tcg_gen_andi_i32(tmp, cpu_T[t], ~1); - } else { - tmp = cpu_T[t]; - } - tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg])); - if (reg == 15) { - dead_tmp(tmp); - s->is_jmp = DISAS_JUMP; - } -} - -static inline void gen_movl_reg_T0(DisasContext *s, int reg) -{ - gen_movl_reg_TN(s, reg, 0); -} - -static inline void gen_movl_reg_T1(DisasContext *s, int reg) -{ - gen_movl_reg_TN(s, reg, 1); + tcg_gen_movi_i32(cpu_R[15], val); } /* Force a TB lookup after an instruction that changes the CPU state. */ static inline void gen_lookup_tb(DisasContext *s) { - gen_op_movl_T0_im(s->pc); - gen_movl_reg_T0(s, 15); + tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); s->is_jmp = DISAS_UPDATE; } @@ -965,7 +873,7 @@ static inline void gen_add_data_offset(DisasContext *s, unsigned int insn, tcg_gen_sub_i32(var, var, offset); else tcg_gen_add_i32(var, var, offset); - dead_tmp(offset); + tcg_temp_free_i32(offset); } } @@ -993,7 +901,7 @@ static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn, tcg_gen_sub_i32(var, var, offset); else tcg_gen_add_i32(var, var, offset); - dead_tmp(offset); + tcg_temp_free_i32(offset); } } @@ -1013,6 +921,26 @@ VFP_OP2(div) #undef VFP_OP2 +static inline void gen_vfp_F1_mul(int dp) +{ + /* Like gen_vfp_mul() but put result in F1 */ + if (dp) { + gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env); + } else { + gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env); + } +} + +static inline void gen_vfp_F1_neg(int dp) +{ + /* Like gen_vfp_neg() but put result in F1 */ + if (dp) { + gen_helper_vfp_negd(cpu_F1d, cpu_F0d); + } else { + gen_helper_vfp_negs(cpu_F1s, cpu_F0s); + } +} + static inline void gen_vfp_abs(int dp) { if (dp) @@ -1061,61 +989,73 @@ static inline void gen_vfp_F1_ld0(int dp) tcg_gen_movi_i32(cpu_F1s, 0); } -static inline void gen_vfp_uito(int dp) -{ - if (dp) - gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_sito(int dp) -{ - if (dp) - gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env); - else - gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env); -} - -static inline void gen_vfp_toui(int dp) -{ - if (dp) - gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_ITOF(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = tcg_temp_new_ptr(); \ + int offset; \ + if (neon) { \ + offset = offsetof(CPUState, vfp.standard_fp_status); \ + } else { \ + offset = offsetof(CPUState, vfp.fp_status); \ + } \ + tcg_gen_addi_ptr(statusptr, cpu_env, offset); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_touiz(int dp) -{ - if (dp) - gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_ITOF(uito) +VFP_GEN_ITOF(sito) +#undef VFP_GEN_ITOF -static inline void gen_vfp_tosi(int dp) -{ - if (dp) - gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env); +#define VFP_GEN_FTOI(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = tcg_temp_new_ptr(); \ + int offset; \ + if (neon) { \ + offset = offsetof(CPUState, vfp.standard_fp_status); \ + } else { \ + offset = offsetof(CPUState, vfp.fp_status); \ + } \ + tcg_gen_addi_ptr(statusptr, cpu_env, offset); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ } -static inline void gen_vfp_tosiz(int dp) -{ - if (dp) - gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env); - else - gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env); -} +VFP_GEN_FTOI(toui) +VFP_GEN_FTOI(touiz) +VFP_GEN_FTOI(tosi) +VFP_GEN_FTOI(tosiz) +#undef VFP_GEN_FTOI #define VFP_GEN_FIX(name) \ -static inline void gen_vfp_##name(int dp, int shift) \ +static inline void gen_vfp_##name(int dp, int shift, int neon) \ { \ - if (dp) \ - gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\ - else \ - gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\ + TCGv tmp_shift = tcg_const_i32(shift); \ + TCGv_ptr statusptr = tcg_temp_new_ptr(); \ + int offset; \ + if (neon) { \ + offset = offsetof(CPUState, vfp.standard_fp_status); \ + } else { \ + offset = offsetof(CPUState, vfp.fp_status); \ + } \ + tcg_gen_addi_ptr(statusptr, cpu_env, offset); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \ + } \ + tcg_temp_free_i32(tmp_shift); \ + tcg_temp_free_ptr(statusptr); \ } VFP_GEN_FIX(tosh) VFP_GEN_FIX(tosl) @@ -1127,20 +1067,20 @@ VFP_GEN_FIX(uhto) VFP_GEN_FIX(ulto) #undef VFP_GEN_FIX -static inline void gen_vfp_ld(DisasContext *s, int dp) +static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr) { if (dp) - tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s)); + tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s)); else - tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s)); + tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s)); } -static inline void gen_vfp_st(DisasContext *s, int dp) +static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr) { if (dp) - tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s)); + tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s)); else - tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s)); + tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s)); } static inline long @@ -1167,17 +1107,9 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } -/* FIXME: Remove these. */ -#define neon_T0 cpu_T[0] -#define neon_T1 cpu_T[1] -#define NEON_GET_REG(T, reg, n) \ - tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) -#define NEON_SET_REG(T, reg, n) \ - tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n)) - static TCGv neon_load_reg(int reg, int pass) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass)); return tmp; } @@ -1185,7 +1117,7 @@ static TCGv neon_load_reg(int reg, int pass) static void neon_store_reg(int reg, int pass, TCGv var) { tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); - dead_tmp(var); + tcg_temp_free_i32(var); } static inline void neon_load_reg64(TCGv_i64 var, int reg) @@ -1239,19 +1171,17 @@ static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg])); } -static inline void gen_op_iwmmxt_movl_wCx_T0(int reg) -{ - tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); -} - -static inline void gen_op_iwmmxt_movl_T0_wCx(int reg) +static inline TCGv iwmmxt_load_creg(int reg) { - tcg_gen_ld_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); + TCGv var = tcg_temp_new_i32(); + tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); + return var; } -static inline void gen_op_iwmmxt_movl_T1_wCx(int reg) +static inline void iwmmxt_store_creg(int reg, TCGv var) { - tcg_gen_ld_i32(cpu_T[1], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); + tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg])); + tcg_temp_free_i32(var); } static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) @@ -1289,22 +1219,15 @@ static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \ } -#define IWMMXT_OP_ENV(name) \ -static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ -{ \ - iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \ -} - -#define IWMMXT_OP_ENV_SIZE(name) \ -IWMMXT_OP_ENV(name##b) \ -IWMMXT_OP_ENV(name##w) \ -IWMMXT_OP_ENV(name##l) +#define IWMMXT_OP_SIZE(name) \ +IWMMXT_OP(name##b) \ +IWMMXT_OP(name##w) \ +IWMMXT_OP(name##l) -#define IWMMXT_OP_ENV1(name) \ +#define IWMMXT_OP_1(name) \ static inline void gen_op_iwmmxt_##name##_M0(void) \ { \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_M0); \ } IWMMXT_OP(maddsq) @@ -1318,100 +1241,51 @@ IWMMXT_OP(muluhw) IWMMXT_OP(macsw) IWMMXT_OP(macuw) -IWMMXT_OP_ENV_SIZE(unpackl) -IWMMXT_OP_ENV_SIZE(unpackh) - -IWMMXT_OP_ENV1(unpacklub) -IWMMXT_OP_ENV1(unpackluw) -IWMMXT_OP_ENV1(unpacklul) -IWMMXT_OP_ENV1(unpackhub) -IWMMXT_OP_ENV1(unpackhuw) -IWMMXT_OP_ENV1(unpackhul) -IWMMXT_OP_ENV1(unpacklsb) -IWMMXT_OP_ENV1(unpacklsw) -IWMMXT_OP_ENV1(unpacklsl) -IWMMXT_OP_ENV1(unpackhsb) -IWMMXT_OP_ENV1(unpackhsw) -IWMMXT_OP_ENV1(unpackhsl) - -IWMMXT_OP_ENV_SIZE(cmpeq) -IWMMXT_OP_ENV_SIZE(cmpgtu) -IWMMXT_OP_ENV_SIZE(cmpgts) - -IWMMXT_OP_ENV_SIZE(mins) -IWMMXT_OP_ENV_SIZE(minu) -IWMMXT_OP_ENV_SIZE(maxs) -IWMMXT_OP_ENV_SIZE(maxu) - -IWMMXT_OP_ENV_SIZE(subn) -IWMMXT_OP_ENV_SIZE(addn) -IWMMXT_OP_ENV_SIZE(subu) -IWMMXT_OP_ENV_SIZE(addu) -IWMMXT_OP_ENV_SIZE(subs) -IWMMXT_OP_ENV_SIZE(adds) - -IWMMXT_OP_ENV(avgb0) -IWMMXT_OP_ENV(avgb1) -IWMMXT_OP_ENV(avgw0) -IWMMXT_OP_ENV(avgw1) +IWMMXT_OP_SIZE(unpackl) +IWMMXT_OP_SIZE(unpackh) + +IWMMXT_OP_1(unpacklub) +IWMMXT_OP_1(unpackluw) +IWMMXT_OP_1(unpacklul) +IWMMXT_OP_1(unpackhub) +IWMMXT_OP_1(unpackhuw) +IWMMXT_OP_1(unpackhul) +IWMMXT_OP_1(unpacklsb) +IWMMXT_OP_1(unpacklsw) +IWMMXT_OP_1(unpacklsl) +IWMMXT_OP_1(unpackhsb) +IWMMXT_OP_1(unpackhsw) +IWMMXT_OP_1(unpackhsl) + +IWMMXT_OP_SIZE(cmpeq) +IWMMXT_OP_SIZE(cmpgtu) +IWMMXT_OP_SIZE(cmpgts) + +IWMMXT_OP_SIZE(mins) +IWMMXT_OP_SIZE(minu) +IWMMXT_OP_SIZE(maxs) +IWMMXT_OP_SIZE(maxu) + +IWMMXT_OP_SIZE(subn) +IWMMXT_OP_SIZE(addn) +IWMMXT_OP_SIZE(subu) +IWMMXT_OP_SIZE(addu) +IWMMXT_OP_SIZE(subs) +IWMMXT_OP_SIZE(adds) + +IWMMXT_OP(avgb0) +IWMMXT_OP(avgb1) +IWMMXT_OP(avgw0) +IWMMXT_OP(avgw1) IWMMXT_OP(msadb) -IWMMXT_OP_ENV(packuw) -IWMMXT_OP_ENV(packul) -IWMMXT_OP_ENV(packuq) -IWMMXT_OP_ENV(packsw) -IWMMXT_OP_ENV(packsl) -IWMMXT_OP_ENV(packsq) - -static inline void gen_op_iwmmxt_muladdsl_M0_T0_T1(void) -{ - gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]); -} - -static inline void gen_op_iwmmxt_muladdsw_M0_T0_T1(void) -{ - gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]); -} - -static inline void gen_op_iwmmxt_muladdswl_M0_T0_T1(void) -{ - gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]); -} - -static inline void gen_op_iwmmxt_align_M0_T0_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V1, rn); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, cpu_T[0]); -} - -static inline void gen_op_iwmmxt_insr_M0_T0_T1(int shift) -{ - TCGv tmp = tcg_const_i32(shift); - gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1], tmp); -} - -static inline void gen_op_iwmmxt_extrsb_T0_M0(int shift) -{ - tcg_gen_shri_i64(cpu_M0, cpu_M0, shift); - tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0); - tcg_gen_ext8s_i32(cpu_T[0], cpu_T[0]); -} - -static inline void gen_op_iwmmxt_extrsw_T0_M0(int shift) -{ - tcg_gen_shri_i64(cpu_M0, cpu_M0, shift); - tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0); - tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]); -} - -static inline void gen_op_iwmmxt_extru_T0_M0(int shift, uint32_t mask) -{ - tcg_gen_shri_i64(cpu_M0, cpu_M0, shift); - tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0); - if (mask != ~0u) - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask); -} +IWMMXT_OP(packuw) +IWMMXT_OP(packul) +IWMMXT_OP(packuq) +IWMMXT_OP(packsw) +IWMMXT_OP(packsl) +IWMMXT_OP(packsq) static void gen_op_iwmmxt_set_mup(void) { @@ -1431,7 +1305,7 @@ static void gen_op_iwmmxt_set_cup(void) static void gen_op_iwmmxt_setpsr_nz(void) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0); store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]); } @@ -1443,76 +1317,70 @@ static inline void gen_op_iwmmxt_addl_M0_wRn(int rn) tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); } - -static void gen_iwmmxt_movl_T0_T1_wRn(int rn) -{ - iwmmxt_load_reg(cpu_V0, rn); - tcg_gen_trunc_i64_i32(cpu_T[0], cpu_V0); - tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); - tcg_gen_trunc_i64_i32(cpu_T[1], cpu_V0); -} - -static void gen_iwmmxt_movl_wRn_T0_T1(int rn) -{ - tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]); - iwmmxt_store_reg(cpu_V0, rn); -} - -static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn) +static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest) { int rd; uint32_t offset; + TCGv tmp; rd = (insn >> 16) & 0xf; - gen_movl_T1_reg(s, rd); + tmp = load_reg(s, rd); offset = (insn & 0xff) << ((insn >> 7) & 2); if (insn & (1 << 24)) { /* Pre indexed */ if (insn & (1 << 23)) - gen_op_addl_T1_im(offset); + tcg_gen_addi_i32(tmp, tmp, offset); else - gen_op_addl_T1_im(-offset); - + tcg_gen_addi_i32(tmp, tmp, -offset); + tcg_gen_mov_i32(dest, tmp); if (insn & (1 << 21)) - gen_movl_reg_T1(s, rd); + store_reg(s, rd, tmp); + else + tcg_temp_free_i32(tmp); } else if (insn & (1 << 21)) { /* Post indexed */ + tcg_gen_mov_i32(dest, tmp); if (insn & (1 << 23)) - gen_op_movl_T0_im(offset); + tcg_gen_addi_i32(tmp, tmp, offset); else - gen_op_movl_T0_im(- offset); - gen_op_addl_T0_T1(); - gen_movl_reg_T0(s, rd); + tcg_gen_addi_i32(tmp, tmp, -offset); + store_reg(s, rd, tmp); } else if (!(insn & (1 << 23))) return 1; return 0; } -static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask) +static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest) { int rd = (insn >> 0) & 0xf; + TCGv tmp; - if (insn & (1 << 8)) - if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) + if (insn & (1 << 8)) { + if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) { return 1; - else - gen_op_iwmmxt_movl_T0_wCx(rd); - else - gen_iwmmxt_movl_T0_T1_wRn(rd); - - gen_op_movl_T1_im(mask); - gen_op_andl_T0_T1(); + } else { + tmp = iwmmxt_load_creg(rd); + } + } else { + tmp = tcg_temp_new_i32(); + iwmmxt_load_reg(cpu_V0, rd); + tcg_gen_trunc_i64_i32(tmp, cpu_V0); + } + tcg_gen_andi_i32(tmp, tmp, mask); + tcg_gen_mov_i32(dest, tmp); + tcg_temp_free_i32(tmp); return 0; } -/* Disassemble an iwMMXt instruction. Returns nonzero if an error occured +/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) { int rd, wrd; int rdhi, rdlo, rd0, rd1, i; - TCGv tmp; + TCGv addr; + TCGv tmp, tmp2, tmp3; if ((insn & 0x0e000e00) == 0x0c000000) { if ((insn & 0x0fe00ff0) == 0x0c400000) { @@ -1520,77 +1388,78 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) rdlo = (insn >> 12) & 0xf; rdhi = (insn >> 16) & 0xf; if (insn & ARM_CP_RW_BIT) { /* TMRRC */ - gen_iwmmxt_movl_T0_T1_wRn(wrd); - gen_movl_reg_T0(s, rdlo); - gen_movl_reg_T1(s, rdhi); + iwmmxt_load_reg(cpu_V0, wrd); + tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); } else { /* TMCRR */ - gen_movl_T0_reg(s, rdlo); - gen_movl_T1_reg(s, rdhi); - gen_iwmmxt_movl_wRn_T0_T1(wrd); + tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); + iwmmxt_store_reg(cpu_V0, wrd); gen_op_iwmmxt_set_mup(); } return 0; } wrd = (insn >> 12) & 0xf; - if (gen_iwmmxt_address(s, insn)) + addr = tcg_temp_new_i32(); + if (gen_iwmmxt_address(s, insn, addr)) { + tcg_temp_free_i32(addr); return 1; + } if (insn & ARM_CP_RW_BIT) { if ((insn >> 28) == 0xf) { /* WLDRW wCx */ - tmp = gen_ld32(cpu_T[1], IS_USER(s)); - tcg_gen_mov_i32(cpu_T[0], tmp); - dead_tmp(tmp); - gen_op_iwmmxt_movl_wCx_T0(wrd); + tmp = tcg_temp_new_i32(); + tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s)); + iwmmxt_store_creg(wrd, tmp); } else { i = 1; if (insn & (1 << 8)) { if (insn & (1 << 22)) { /* WLDRD */ - tcg_gen_qemu_ld64(cpu_M0, cpu_T[1], IS_USER(s)); + tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s)); i = 0; } else { /* WLDRW wRd */ - tmp = gen_ld32(cpu_T[1], IS_USER(s)); + tmp = gen_ld32(addr, IS_USER(s)); } } else { if (insn & (1 << 22)) { /* WLDRH */ - tmp = gen_ld16u(cpu_T[1], IS_USER(s)); + tmp = gen_ld16u(addr, IS_USER(s)); } else { /* WLDRB */ - tmp = gen_ld8u(cpu_T[1], IS_USER(s)); + tmp = gen_ld8u(addr, IS_USER(s)); } } if (i) { tcg_gen_extu_i32_i64(cpu_M0, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } gen_op_iwmmxt_movq_wRn_M0(wrd); } } else { if ((insn >> 28) == 0xf) { /* WSTRW wCx */ - gen_op_iwmmxt_movl_T0_wCx(wrd); - tmp = new_tmp(); - tcg_gen_mov_i32(tmp, cpu_T[0]); - gen_st32(tmp, cpu_T[1], IS_USER(s)); + tmp = iwmmxt_load_creg(wrd); + gen_st32(tmp, addr, IS_USER(s)); } else { gen_op_iwmmxt_movq_M0_wRn(wrd); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (insn & (1 << 8)) { if (insn & (1 << 22)) { /* WSTRD */ - dead_tmp(tmp); - tcg_gen_qemu_st64(cpu_M0, cpu_T[1], IS_USER(s)); + tcg_temp_free_i32(tmp); + tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s)); } else { /* WSTRW wRd */ tcg_gen_trunc_i64_i32(tmp, cpu_M0); - gen_st32(tmp, cpu_T[1], IS_USER(s)); + gen_st32(tmp, addr, IS_USER(s)); } } else { if (insn & (1 << 22)) { /* WSTRH */ tcg_gen_trunc_i64_i32(tmp, cpu_M0); - gen_st16(tmp, cpu_T[1], IS_USER(s)); + gen_st16(tmp, addr, IS_USER(s)); } else { /* WSTRB */ tcg_gen_trunc_i64_i32(tmp, cpu_M0); - gen_st8(tmp, cpu_T[1], IS_USER(s)); + gen_st8(tmp, addr, IS_USER(s)); } } } } + tcg_temp_free_i32(addr); return 0; } @@ -1622,18 +1491,19 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_op_iwmmxt_set_cup(); /* Fall through. */ case ARM_IWMMXT_wCSSF: - gen_op_iwmmxt_movl_T0_wCx(wrd); - gen_movl_T1_reg(s, rd); - gen_op_bicl_T0_T1(); - gen_op_iwmmxt_movl_wCx_T0(wrd); + tmp = iwmmxt_load_creg(wrd); + tmp2 = load_reg(s, rd); + tcg_gen_andc_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + iwmmxt_store_creg(wrd, tmp); break; case ARM_IWMMXT_wCGR0: case ARM_IWMMXT_wCGR1: case ARM_IWMMXT_wCGR2: case ARM_IWMMXT_wCGR3: gen_op_iwmmxt_set_cup(); - gen_movl_reg_T0(s, rd); - gen_op_iwmmxt_movl_wCx_T0(wrd); + tmp = load_reg(s, rd); + iwmmxt_store_creg(wrd, tmp); break; default: return 1; @@ -1655,8 +1525,8 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) return 1; rd = (insn >> 12) & 0xf; wrd = (insn >> 16) & 0xf; - gen_op_iwmmxt_movl_T0_wCx(wrd); - gen_movl_reg_T0(s, rd); + tmp = iwmmxt_load_creg(wrd); + store_reg(s, rd, tmp); break; case 0x300: /* WANDN */ wrd = (insn >> 12) & 0xf; @@ -1833,131 +1703,145 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) rd0 = (insn >> 16) & 0xf; rd1 = (insn >> 0) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3)); - gen_op_movl_T1_im(7); - gen_op_andl_T0_T1(); - gen_op_iwmmxt_align_M0_T0_wRn(rd1); + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3)); + tcg_gen_andi_i32(tmp, tmp, 7); + iwmmxt_load_reg(cpu_V1, rd1); + gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */ + if (((insn >> 6) & 3) == 3) + return 1; rd = (insn >> 12) & 0xf; wrd = (insn >> 16) & 0xf; - gen_movl_T0_reg(s, rd); + tmp = load_reg(s, rd); gen_op_iwmmxt_movq_M0_wRn(wrd); switch ((insn >> 6) & 3) { case 0: - gen_op_movl_T1_im(0xff); - gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3); + tmp2 = tcg_const_i32(0xff); + tmp3 = tcg_const_i32((insn & 7) << 3); break; case 1: - gen_op_movl_T1_im(0xffff); - gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4); + tmp2 = tcg_const_i32(0xffff); + tmp3 = tcg_const_i32((insn & 3) << 4); break; case 2: - gen_op_movl_T1_im(0xffffffff); - gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5); + tmp2 = tcg_const_i32(0xffffffff); + tmp3 = tcg_const_i32((insn & 1) << 5); break; - case 3: - return 1; + default: + TCGV_UNUSED(tmp2); + TCGV_UNUSED(tmp3); } + gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); + tcg_temp_free(tmp3); + tcg_temp_free(tmp2); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */ rd = (insn >> 12) & 0xf; wrd = (insn >> 16) & 0xf; - if (rd == 15) + if (rd == 15 || ((insn >> 22) & 3) == 3) return 1; gen_op_iwmmxt_movq_M0_wRn(wrd); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { case 0: - if (insn & 8) - gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3); - else { - gen_op_iwmmxt_extru_T0_M0((insn & 7) << 3, 0xff); + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + if (insn & 8) { + tcg_gen_ext8s_i32(tmp, tmp); + } else { + tcg_gen_andi_i32(tmp, tmp, 0xff); } break; case 1: - if (insn & 8) - gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4); - else { - gen_op_iwmmxt_extru_T0_M0((insn & 3) << 4, 0xffff); + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + if (insn & 8) { + tcg_gen_ext16s_i32(tmp, tmp); + } else { + tcg_gen_andi_i32(tmp, tmp, 0xffff); } break; case 2: - gen_op_iwmmxt_extru_T0_M0((insn & 1) << 5, ~0u); + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); break; - case 3: - return 1; } - gen_movl_reg_T0(s, rd); + store_reg(s, rd, tmp); break; case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */ - if ((insn & 0x000ff008) != 0x0003f000) + if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3) return 1; - gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF); + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); switch ((insn >> 22) & 3) { case 0: - gen_op_shrl_T1_im(((insn & 7) << 2) + 0); + tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0); break; case 1: - gen_op_shrl_T1_im(((insn & 3) << 3) + 4); + tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4); break; case 2: - gen_op_shrl_T1_im(((insn & 1) << 4) + 12); + tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12); break; - case 3: - return 1; } - gen_op_shll_T1_im(28); - gen_set_nzcv(cpu_T[1]); + tcg_gen_shli_i32(tmp, tmp, 28); + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp); break; case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ + if (((insn >> 6) & 3) == 3) + return 1; rd = (insn >> 12) & 0xf; wrd = (insn >> 16) & 0xf; - gen_movl_T0_reg(s, rd); + tmp = load_reg(s, rd); switch ((insn >> 6) & 3) { case 0: - gen_helper_iwmmxt_bcstb(cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_bcstb(cpu_M0, tmp); break; case 1: - gen_helper_iwmmxt_bcstw(cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_bcstw(cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_bcstl(cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_bcstl(cpu_M0, tmp); break; - case 3: - return 1; } + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */ - if ((insn & 0x000ff00f) != 0x0003f000) + if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) return 1; - gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF); + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); switch ((insn >> 22) & 3) { case 0: for (i = 0; i < 7; i ++) { - gen_op_shll_T1_im(4); - gen_op_andl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 4); + tcg_gen_and_i32(tmp, tmp, tmp2); } break; case 1: for (i = 0; i < 3; i ++) { - gen_op_shll_T1_im(8); - gen_op_andl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 8); + tcg_gen_and_i32(tmp, tmp, tmp2); } break; case 2: - gen_op_shll_T1_im(16); - gen_op_andl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_and_i32(tmp, tmp, tmp2); break; - case 3: - return 1; } - gen_set_nzcv(cpu_T[0]); + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ wrd = (insn >> 12) & 0xf; @@ -1980,51 +1864,52 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) gen_op_iwmmxt_set_mup(); break; case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */ - if ((insn & 0x000ff00f) != 0x0003f000) + if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) return 1; - gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF); + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); switch ((insn >> 22) & 3) { case 0: for (i = 0; i < 7; i ++) { - gen_op_shll_T1_im(4); - gen_op_orl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 4); + tcg_gen_or_i32(tmp, tmp, tmp2); } break; case 1: for (i = 0; i < 3; i ++) { - gen_op_shll_T1_im(8); - gen_op_orl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 8); + tcg_gen_or_i32(tmp, tmp, tmp2); } break; case 2: - gen_op_shll_T1_im(16); - gen_op_orl_T0_T1(); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); break; - case 3: - return 1; } - gen_set_nzcv(cpu_T[0]); + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ rd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; - if ((insn & 0xf) != 0) + if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3) return 1; gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { case 0: - gen_helper_iwmmxt_msbb(cpu_T[0], cpu_M0); + gen_helper_iwmmxt_msbb(tmp, cpu_M0); break; case 1: - gen_helper_iwmmxt_msbw(cpu_T[0], cpu_M0); + gen_helper_iwmmxt_msbw(tmp, cpu_M0); break; case 2: - gen_helper_iwmmxt_msbl(cpu_T[0], cpu_M0); + gen_helper_iwmmxt_msbl(tmp, cpu_M0); break; - case 3: - return 1; } - gen_movl_reg_T0(s, rd); + store_reg(s, rd, tmp); break; case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */ case 0x906: case 0xb06: case 0xd06: case 0xf06: @@ -2122,100 +2007,120 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) break; case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */ case 0x214: case 0x614: case 0xa14: case 0xe14: + if (((insn >> 22) & 3) == 0) + return 1; wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - if (gen_iwmmxt_shift(insn, 0xff)) + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); return 1; + } switch ((insn >> 22) & 3) { - case 0: - return 1; case 1: - gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_srlw(cpu_M0, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_srll(cpu_M0, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_srlq(cpu_M0, cpu_M0, tmp); break; } + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); break; case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */ case 0x014: case 0x414: case 0x814: case 0xc14: + if (((insn >> 22) & 3) == 0) + return 1; wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - if (gen_iwmmxt_shift(insn, 0xff)) + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); return 1; + } switch ((insn >> 22) & 3) { - case 0: - return 1; case 1: - gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_sraw(cpu_M0, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_sral(cpu_M0, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_sraq(cpu_M0, cpu_M0, tmp); break; } + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); break; case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */ case 0x114: case 0x514: case 0x914: case 0xd14: + if (((insn >> 22) & 3) == 0) + return 1; wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - if (gen_iwmmxt_shift(insn, 0xff)) + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); return 1; + } switch ((insn >> 22) & 3) { - case 0: - return 1; case 1: - gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_sllw(cpu_M0, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_slll(cpu_M0, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + gen_helper_iwmmxt_sllq(cpu_M0, cpu_M0, tmp); break; } + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); break; case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */ case 0x314: case 0x714: case 0xb14: case 0xf14: + if (((insn >> 22) & 3) == 0) + return 1; wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); switch ((insn >> 22) & 3) { - case 0: - return 1; case 1: - if (gen_iwmmxt_shift(insn, 0xf)) + if (gen_iwmmxt_shift(insn, 0xf, tmp)) { + tcg_temp_free_i32(tmp); return 1; - gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + } + gen_helper_iwmmxt_rorw(cpu_M0, cpu_M0, tmp); break; case 2: - if (gen_iwmmxt_shift(insn, 0x1f)) + if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { + tcg_temp_free_i32(tmp); return 1; - gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + } + gen_helper_iwmmxt_rorl(cpu_M0, cpu_M0, tmp); break; case 3: - if (gen_iwmmxt_shift(insn, 0x3f)) + if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { + tcg_temp_free_i32(tmp); return 1; - gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + } + gen_helper_iwmmxt_rorq(cpu_M0, cpu_M0, tmp); break; } + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2288,8 +2193,10 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) rd0 = (insn >> 16) & 0xf; rd1 = (insn >> 0) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_movl_T0_im((insn >> 20) & 3); - gen_op_iwmmxt_align_M0_T0_wRn(rd1); + tmp = tcg_const_i32((insn >> 20) & 3); + iwmmxt_load_reg(cpu_V1, rd1); + gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); + tcg_temp_free(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -2343,8 +2250,9 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f)); - gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, cpu_T[0]); + tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); + gen_helper_iwmmxt_shufh(cpu_M0, cpu_M0, tmp); + tcg_temp_free(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2396,15 +2304,13 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) case 0x408: case 0x508: case 0x608: case 0x708: case 0x808: case 0x908: case 0xa08: case 0xb08: case 0xc08: case 0xd08: case 0xe08: case 0xf08: + if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0) + return 1; wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; rd1 = (insn >> 0) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - if (!(insn & (1 << 20))) - return 1; switch ((insn >> 22) & 3) { - case 0: - return 1; case 1: if (insn & (1 << 21)) gen_op_iwmmxt_packsw_M0_wRn(rd1); @@ -2438,30 +2344,29 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) if (rd0 == 0xf || rd1 == 0xf) return 1; gen_op_iwmmxt_movq_M0_wRn(wrd); + tmp = load_reg(s, rd0); + tmp2 = load_reg(s, rd1); switch ((insn >> 16) & 0xf) { case 0x0: /* TMIA */ - gen_movl_T0_reg(s, rd0); - gen_movl_T1_reg(s, rd1); - gen_op_iwmmxt_muladdsl_M0_T0_T1(); + gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); break; case 0x8: /* TMIAPH */ - gen_movl_T0_reg(s, rd0); - gen_movl_T1_reg(s, rd1); - gen_op_iwmmxt_muladdsw_M0_T0_T1(); + gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); break; case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */ - gen_movl_T1_reg(s, rd0); if (insn & (1 << 16)) - gen_op_shrl_T1_im(16); - gen_op_movl_T0_T1(); - gen_movl_T1_reg(s, rd1); + tcg_gen_shri_i32(tmp, tmp, 16); if (insn & (1 << 17)) - gen_op_shrl_T1_im(16); - gen_op_iwmmxt_muladdswl_M0_T0_T1(); + tcg_gen_shri_i32(tmp2, tmp2, 16); + gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); break; default: + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); return 1; } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -2472,11 +2377,12 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn) return 0; } -/* Disassemble an XScale DSP instruction. Returns nonzero if an error occured +/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) { int acc, rd0, rd1, rdhi, rdlo; + TCGv tmp, tmp2; if ((insn & 0x0ff00f10) == 0x0e200010) { /* Multiply with Internal Accumulate Format */ @@ -2487,33 +2393,30 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) if (acc != 0) return 1; + tmp = load_reg(s, rd0); + tmp2 = load_reg(s, rd1); switch ((insn >> 16) & 0xf) { case 0x0: /* MIA */ - gen_movl_T0_reg(s, rd0); - gen_movl_T1_reg(s, rd1); - gen_op_iwmmxt_muladdsl_M0_T0_T1(); + gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); break; case 0x8: /* MIAPH */ - gen_movl_T0_reg(s, rd0); - gen_movl_T1_reg(s, rd1); - gen_op_iwmmxt_muladdsw_M0_T0_T1(); + gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); break; case 0xc: /* MIABB */ case 0xd: /* MIABT */ case 0xe: /* MIATB */ case 0xf: /* MIATT */ - gen_movl_T1_reg(s, rd0); if (insn & (1 << 16)) - gen_op_shrl_T1_im(16); - gen_op_movl_T0_T1(); - gen_movl_T1_reg(s, rd1); + tcg_gen_shri_i32(tmp, tmp, 16); if (insn & (1 << 17)) - gen_op_shrl_T1_im(16); - gen_op_iwmmxt_muladdswl_M0_T0_T1(); + tcg_gen_shri_i32(tmp2, tmp2, 16); + gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); break; default: return 1; } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(acc); return 0; @@ -2529,15 +2432,14 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) return 1; if (insn & ARM_CP_RW_BIT) { /* MRA */ - gen_iwmmxt_movl_T0_T1_wRn(acc); - gen_movl_reg_T0(s, rdlo); - gen_op_movl_T0_im((1 << (40 - 32)) - 1); - gen_op_andl_T0_T1(); - gen_movl_reg_T0(s, rdhi); + iwmmxt_load_reg(cpu_V0, acc); + tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); + tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1); } else { /* MAR */ - gen_movl_T0_reg(s, rdlo); - gen_movl_T1_reg(s, rdhi); - gen_iwmmxt_movl_wRn_T0_T1(acc); + tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); + iwmmxt_store_reg(cpu_V0, acc); } return 0; } @@ -2549,27 +2451,28 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn) instruction is not defined. */ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn) { - TCGv tmp; + TCGv tmp, tmp2; uint32_t rd = (insn >> 12) & 0xf; uint32_t cp = (insn >> 8) & 0xf; - if (IS_USER(s)) { - return 1; - } if (insn & ARM_CP_RW_BIT) { if (!env->cp[cp].cp_read) return 1; gen_set_pc_im(s->pc); - tmp = new_tmp(); - gen_helper_get_cp(tmp, cpu_env, tcg_const_i32(insn)); + tmp = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(insn); + gen_helper_get_cp(tmp, cpu_env, tmp2); + tcg_temp_free(tmp2); store_reg(s, rd, tmp); } else { if (!env->cp[cp].cp_write) return 1; gen_set_pc_im(s->pc); tmp = load_reg(s, rd); - gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp); - dead_tmp(tmp); + tmp2 = tcg_const_i32(insn); + gen_helper_set_cp(cpu_env, tmp2, tmp); + tcg_temp_free(tmp2); + tcg_temp_free_i32(tmp); } return 0; } @@ -2594,12 +2497,61 @@ static int cp15_user_ok(uint32_t insn) return 0; } +static int cp15_tls_load_store(CPUState *env, DisasContext *s, uint32_t insn, uint32_t rd) +{ + TCGv tmp; + int cpn = (insn >> 16) & 0xf; + int cpm = insn & 0xf; + int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38); + + if (!arm_feature(env, ARM_FEATURE_V6K)) + return 0; + + if (!(cpn == 13 && cpm == 0)) + return 0; + + if (insn & ARM_CP_RW_BIT) { + switch (op) { + case 2: + tmp = load_cpu_field(cp15.c13_tls1); + break; + case 3: + tmp = load_cpu_field(cp15.c13_tls2); + break; + case 4: + tmp = load_cpu_field(cp15.c13_tls3); + break; + default: + return 0; + } + store_reg(s, rd, tmp); + + } else { + tmp = load_reg(s, rd); + switch (op) { + case 2: + store_cpu_field(tmp, cp15.c13_tls1); + break; + case 3: + store_cpu_field(tmp, cp15.c13_tls2); + break; + case 4: + store_cpu_field(tmp, cp15.c13_tls3); + break; + default: + tcg_temp_free_i32(tmp); + return 0; + } + } + return 1; +} + /* Disassemble system coprocessor (cp15) instruction. Return nonzero if instruction is not defined. */ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) { uint32_t rd; - TCGv tmp; + TCGv tmp, tmp2; /* M profile cores use memory mapped registers instead of cp15. */ if (arm_feature(env, ARM_FEATURE_M)) @@ -2620,26 +2572,56 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) if (IS_USER(s) && !cp15_user_ok(insn)) { return 1; } - if ((insn & 0x0fff0fff) == 0x0e070f90 - || (insn & 0x0fff0fff) == 0x0e070f58) { - /* Wait for interrupt. */ - gen_set_pc_im(s->pc); - s->is_jmp = DISAS_WFI; + + /* Pre-v7 versions of the architecture implemented WFI via coprocessor + * instructions rather than a separate instruction. + */ + if ((insn & 0x0fff0fff) == 0x0e070f90) { + /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores). + * In v7, this must NOP. + */ + if (!arm_feature(env, ARM_FEATURE_V7)) { + /* Wait for interrupt. */ + gen_set_pc_im(s->pc); + s->is_jmp = DISAS_WFI; + } return 0; } + + if ((insn & 0x0fff0fff) == 0x0e070f58) { + /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI, + * so this is slightly over-broad. + */ + if (!arm_feature(env, ARM_FEATURE_V6)) { + /* Wait for interrupt. */ + gen_set_pc_im(s->pc); + s->is_jmp = DISAS_WFI; + return 0; + } + /* Otherwise fall through to handle via helper function. + * In particular, on v7 and some v6 cores this is one of + * the VA-PA registers. + */ + } + rd = (insn >> 12) & 0xf; + + if (cp15_tls_load_store(env, s, insn, rd)) + return 0; + + tmp2 = tcg_const_i32(insn); if (insn & ARM_CP_RW_BIT) { - tmp = new_tmp(); - gen_helper_get_cp15(tmp, cpu_env, tcg_const_i32(insn)); + tmp = tcg_temp_new_i32(); + gen_helper_get_cp15(tmp, cpu_env, tmp2); /* If the destination register is r15 then sets condition codes. */ if (rd != 15) store_reg(s, rd, tmp); else - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { tmp = load_reg(s, rd); - gen_helper_set_cp15(cpu_env, tcg_const_i32(insn), tmp); - dead_tmp(tmp); + gen_helper_set_cp15(cpu_env, tmp2, tmp); + tcg_temp_free_i32(tmp); /* Normally we would always end the TB here, but Linux * arch/arm/mach-pxa/sleep.S expects two instructions following * an MMU enable to execute from cache. Imitate this behaviour. */ @@ -2647,6 +2629,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) (insn & 0x0fff0fff) != 0x0e010f10) gen_lookup_tb(s); } + tcg_temp_free_i32(tmp2); return 0; } @@ -2673,7 +2656,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn) /* Move between integer and VFP cores. */ static TCGv gen_vfp_mrs(void) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp, cpu_F0s); return tmp; } @@ -2681,18 +2664,12 @@ static TCGv gen_vfp_mrs(void) static void gen_vfp_msr(TCGv tmp) { tcg_gen_mov_i32(cpu_F0s, tmp); - dead_tmp(tmp); -} - -static inline int -vfp_enabled(CPUState * env) -{ - return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_u8(TCGv var, int shift) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); if (shift) tcg_gen_shri_i32(var, var, shift); tcg_gen_ext8u_i32(var, var); @@ -2700,40 +2677,63 @@ static void gen_neon_dup_u8(TCGv var, int shift) tcg_gen_or_i32(var, var, tmp); tcg_gen_shli_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_low16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_ext16u_i32(var, var); tcg_gen_shli_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } static void gen_neon_dup_high16(TCGv var) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_andi_i32(var, var, 0xffff0000); tcg_gen_shri_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } -/* Disassemble a VFP instruction. Returns nonzero if an error occured +static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size) +{ + /* Load a single Neon element and replicate into a 32 bit TCG reg */ + TCGv tmp; + switch (size) { + case 0: + tmp = gen_ld8u(addr, IS_USER(s)); + gen_neon_dup_u8(tmp, 0); + break; + case 1: + tmp = gen_ld16u(addr, IS_USER(s)); + gen_neon_dup_low16(tmp); + break; + case 2: + tmp = gen_ld32(addr, IS_USER(s)); + break; + default: /* Avoid compiler warnings. */ + abort(); + } + return tmp; +} + +/* Disassemble a VFP instruction. Returns nonzero if an error occurred (ie. an undefined instruction). */ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) { uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask; int dp, veclen; + TCGv addr; TCGv tmp; TCGv tmp2; if (!arm_feature(env, ARM_FEATURE_VFP)) return 1; - if (!vfp_enabled(env)) { + if (!s->vfp_enabled) { /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */ if ((insn & 0x0fe00fff) != 0x0ee00a10) return 1; @@ -2812,7 +2812,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_neon_dup_low16(tmp); } for (n = 0; n <= pass * 2; n++) { - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_mov_i32(tmp2, tmp); neon_store_reg(rn, n, tmp2); } @@ -2823,12 +2823,12 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: tmp2 = neon_load_reg(rn, pass); gen_bfi(tmp, tmp2, tmp, offset, 0xff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); break; case 1: tmp2 = neon_load_reg(rn, pass); gen_bfi(tmp, tmp2, tmp, offset, 0xffff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); break; case 2: break; @@ -2874,7 +2874,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); tcg_gen_andi_i32(tmp, tmp, 0xf0000000); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_vfp_get_fpscr(tmp, cpu_env); } break; @@ -2895,7 +2895,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) if (rd == 15) { /* Set the 4 flag bits in the CPSR. */ gen_set_nzcv(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { store_reg(s, rd, tmp); } @@ -2913,12 +2913,15 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case ARM_VFP_FPSCR: gen_helper_vfp_set_fpscr(cpu_env, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_lookup_tb(s); break; case ARM_VFP_FPEXC: if (IS_USER(s)) return 1; + /* TODO: VFP subarchitecture support. + * For now, keep the EN bit only */ + tcg_gen_andi_i32(tmp, tmp, 1 << 30); store_cpu_field(tmp, vfp.xregs[rn]); gen_lookup_tb(s); break; @@ -2948,16 +2951,18 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); } - if (op == 15 && (rn == 15 || rn > 17)) { + if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) { /* Integer or single precision destination. */ rd = VFP_SREG_D(insn); } else { VFP_DREG_D(rd, insn); } - - if (op == 15 && (rn == 16 || rn == 17)) { - /* Integer source. */ - rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1); + if (op == 15 && + (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) { + /* VCVT from int is always from S reg regardless of dp bit. + * VCVT with immediate frac_bits has same format as SREG_M + */ + rm = VFP_SREG_M(insn); } else { VFP_DREG_M(rm, insn); } @@ -2969,10 +2974,13 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) } else { rd = VFP_SREG_D(insn); } + /* NB that we implicitly rely on the encoding for the frac_bits + * in VCVT of fixed to float being the same as that of an SREG_M + */ rm = VFP_SREG_M(insn); } - veclen = env->vfp.vec_len; + veclen = s->vec_len; if (op == 15 && rn > 3) veclen = 0; @@ -2993,9 +3001,9 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) veclen = 0; } else { if (dp) - delta_d = (env->vfp.vec_stride >> 1) + 1; + delta_d = (s->vec_stride >> 1) + 1; else - delta_d = env->vfp.vec_stride + 1; + delta_d = s->vec_stride + 1; if ((rm & bank_mask) == 0) { /* mixed scalar/vector */ @@ -3052,27 +3060,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) for (;;) { /* Perform the calculation. */ switch (op) { - case 0: /* mac: fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); + case 0: /* VMLA: fd + (fn * fm) */ + /* Note that order of inputs to the add matters for NaNs */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 1: /* nmac: fd - (fn * fm) */ + case 1: /* VMLS: fd + -(fn * fm) */ gen_vfp_mul(dp); - gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_add(dp); break; - case 2: /* msc: -fd + (fn * fm) */ - gen_vfp_mul(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + case 2: /* VNMLS: -fd + (fn * fm) */ + /* Note that it isn't valid to replace (-A + B) with (B - A) + * or similar plausible looking simplifications + * because this will give wrong results for NaNs. + */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_neg(dp); + gen_vfp_add(dp); break; - case 3: /* nmsc: -fd - (fn * fm) */ + case 3: /* VNMLA: -fd + -(fn * fm) */ gen_vfp_mul(dp); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); gen_vfp_neg(dp); - gen_mov_F1_vreg(dp, rd); - gen_vfp_sub(dp); + gen_vfp_add(dp); break; case 4: /* mul: fn * fm */ gen_vfp_mul(dp); @@ -3126,6 +3141,47 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) case 3: /* sqrt */ gen_vfp_sqrt(dp); break; + case 4: /* vcvtb.f32.f16 */ + if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) + return 1; + tmp = gen_vfp_mrs(); + tcg_gen_ext16u_i32(tmp, tmp); + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + tcg_temp_free_i32(tmp); + break; + case 5: /* vcvtt.f32.f16 */ + if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) + return 1; + tmp = gen_vfp_mrs(); + tcg_gen_shri_i32(tmp, tmp, 16); + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + tcg_temp_free_i32(tmp); + break; + case 6: /* vcvtb.f16.f32 */ + if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) + return 1; + tmp = tcg_temp_new_i32(); + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + gen_mov_F0_vreg(0, rd); + tmp2 = gen_vfp_mrs(); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + gen_vfp_msr(tmp); + break; + case 7: /* vcvtt.f16.f32 */ + if (!arm_feature(env, ARM_FEATURE_VFP_FP16)) + return 1; + tmp = tcg_temp_new_i32(); + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + tcg_gen_shli_i32(tmp, tmp, 16); + gen_mov_F0_vreg(0, rd); + tmp2 = gen_vfp_mrs(); + tcg_gen_ext16u_i32(tmp2, tmp2); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + gen_vfp_msr(tmp); + break; case 8: /* cmp */ gen_vfp_cmp(dp); break; @@ -3146,62 +3202,62 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env); break; case 16: /* fuito */ - gen_vfp_uito(dp); + gen_vfp_uito(dp, 0); break; case 17: /* fsito */ - gen_vfp_sito(dp); + gen_vfp_sito(dp, 0); break; case 20: /* fshto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_shto(dp, 16 - rm); + gen_vfp_shto(dp, 16 - rm, 0); break; case 21: /* fslto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_slto(dp, 32 - rm); + gen_vfp_slto(dp, 32 - rm, 0); break; case 22: /* fuhto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_uhto(dp, 16 - rm); + gen_vfp_uhto(dp, 16 - rm, 0); break; case 23: /* fulto */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_ulto(dp, 32 - rm); + gen_vfp_ulto(dp, 32 - rm, 0); break; case 24: /* ftoui */ - gen_vfp_toui(dp); + gen_vfp_toui(dp, 0); break; case 25: /* ftouiz */ - gen_vfp_touiz(dp); + gen_vfp_touiz(dp, 0); break; case 26: /* ftosi */ - gen_vfp_tosi(dp); + gen_vfp_tosi(dp, 0); break; case 27: /* ftosiz */ - gen_vfp_tosiz(dp); + gen_vfp_tosiz(dp, 0); break; case 28: /* ftosh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosh(dp, 16 - rm); + gen_vfp_tosh(dp, 16 - rm, 0); break; case 29: /* ftosl */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_tosl(dp, 32 - rm); + gen_vfp_tosl(dp, 32 - rm, 0); break; case 30: /* ftouh */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_touh(dp, 16 - rm); + gen_vfp_touh(dp, 16 - rm, 0); break; case 31: /* ftoul */ if (!arm_feature(env, ARM_FEATURE_VFP3)) return 1; - gen_vfp_toul(dp, 32 - rm); + gen_vfp_toul(dp, 32 - rm, 0); break; default: /* undefined */ printf ("rn:%d\n", rn); @@ -3216,8 +3272,8 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Write back the result. */ if (op == 15 && (rn >= 8 && rn <= 11)) ; /* Comparison, do nothing. */ - else if (op == 15 && rn > 17) - /* Integer result. */ + else if (op == 15 && dp && ((rn & 0x1c) == 0x18)) + /* VCVT double to int: always integer result. */ gen_mov_vreg_F0(0, rd); else if (op == 15 && rn == 15) /* conversion */ @@ -3264,7 +3320,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) break; case 0xc: case 0xd: - if (dp && (insn & 0x03e00000) == 0x00400000) { + if ((insn & 0x03e00000) == 0x00400000) { /* two-register transfer */ rn = (insn >> 16) & 0xf; rd = (insn >> 12) & 0xf; @@ -3286,10 +3342,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) } else { gen_mov_F0_vreg(0, rm); tmp = gen_vfp_mrs(); - store_reg(s, rn, tmp); + store_reg(s, rd, tmp); gen_mov_F0_vreg(0, rm + 1); tmp = gen_vfp_mrs(); - store_reg(s, rd, tmp); + store_reg(s, rn, tmp); } } else { /* arm->vfp */ @@ -3301,10 +3357,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm * 2 + 1); } else { - tmp = load_reg(s, rn); + tmp = load_reg(s, rd); gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm); - tmp = load_reg(s, rd); + tmp = load_reg(s, rn); gen_vfp_msr(tmp); gen_mov_vreg_F0(0, rm + 1); } @@ -3317,23 +3373,25 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) else rd = VFP_SREG_D(insn); if (s->thumb && rn == 15) { - gen_op_movl_T1_im(s->pc & ~2); + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, s->pc & ~2); } else { - gen_movl_T1_reg(s, rn); + addr = load_reg(s, rn); } if ((insn & 0x01200000) == 0x01000000) { /* Single load/store */ offset = (insn & 0xff) << 2; if ((insn & (1 << 23)) == 0) offset = -offset; - gen_op_addl_T1_im(offset); + tcg_gen_addi_i32(addr, addr, offset); if (insn & (1 << 20)) { - gen_vfp_ld(s, dp); + gen_vfp_ld(s, dp, addr); gen_mov_vreg_F0(dp, rd); } else { gen_mov_F0_vreg(dp, rd); - gen_vfp_st(s, dp); + gen_vfp_st(s, dp, addr); } + tcg_temp_free_i32(addr); } else { /* load/store multiple */ if (dp) @@ -3342,24 +3400,26 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) n = insn & 0xff; if (insn & (1 << 24)) /* pre-decrement */ - gen_op_addl_T1_im(-((insn & 0xff) << 2)); + tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2)); if (dp) offset = 8; else offset = 4; + tmp = tcg_const_i32(offset); for (i = 0; i < n; i++) { if (insn & ARM_CP_RW_BIT) { /* load */ - gen_vfp_ld(s, dp); + gen_vfp_ld(s, dp, addr); gen_mov_vreg_F0(dp, rd + i); } else { /* store */ gen_mov_F0_vreg(dp, rd + i); - gen_vfp_st(s, dp); + gen_vfp_st(s, dp, addr); } - gen_op_addl_T1_im(offset); + tcg_gen_add_i32(addr, addr, tmp); } + tcg_temp_free_i32(tmp); if (insn & (1 << 21)) { /* writeback */ if (insn & (1 << 24)) @@ -3370,8 +3430,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) offset = 0; if (offset != 0) - gen_op_addl_T1_im(offset); - gen_movl_reg_T1(s, rn); + tcg_gen_addi_i32(addr, addr, offset); + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); } } } @@ -3391,7 +3453,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest) if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { tcg_gen_goto_tb(n); gen_set_pc_im(dest); - tcg_gen_exit_tb((long)tb + n); + tcg_gen_exit_tb((tcg_target_long)tb + n); } else { gen_set_pc_im(dest); tcg_gen_exit_tb(0); @@ -3440,6 +3502,10 @@ static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) { /* Mask out undefined bits. */ mask &= ~CPSR_RESERVED; + if (!arm_feature(env, ARM_FEATURE_V4T)) + mask &= ~CPSR_T; + if (!arm_feature(env, ARM_FEATURE_V5)) + mask &= ~CPSR_Q; /* V5TE in reality*/ if (!arm_feature(env, ARM_FEATURE_V6)) mask &= ~(CPSR_E | CPSR_GE); if (!arm_feature(env, ARM_FEATURE_THUMB2)) @@ -3453,8 +3519,8 @@ static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) { return mask; } -/* Returns nonzero if access to the PSR is not permitted. */ -static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr) +/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */ +static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0) { TCGv tmp; if (spsr) { @@ -3464,16 +3530,26 @@ static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr) tmp = load_cpu_field(spsr); tcg_gen_andi_i32(tmp, tmp, ~mask); - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask); - tcg_gen_or_i32(tmp, tmp, cpu_T[0]); + tcg_gen_andi_i32(t0, t0, mask); + tcg_gen_or_i32(tmp, tmp, t0); store_cpu_field(tmp, spsr); } else { - gen_set_cpsr(cpu_T[0], mask); + gen_set_cpsr(t0, mask); } + tcg_temp_free_i32(t0); gen_lookup_tb(s); return 0; } +/* Returns nonzero if access to the PSR is not permitted. */ +static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val) +{ + TCGv tmp; + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + return gen_set_psr(s, mask, spsr, tmp); +} + /* Generate an old-style exception return. Marks pc as dead. */ static void gen_exception_return(DisasContext *s, TCGv pc) { @@ -3481,7 +3557,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc) store_reg(s, 15, pc); tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, 0xffffffff); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); s->is_jmp = DISAS_UPDATE; } @@ -3489,7 +3565,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc) static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr) { gen_set_cpsr(cpsr, 0xffffffff); - dead_tmp(cpsr); + tcg_temp_free_i32(cpsr); store_reg(s, 15, pc); s->is_jmp = DISAS_UPDATE; } @@ -3499,15 +3575,18 @@ gen_set_condexec (DisasContext *s) { if (s->condexec_mask) { uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_cpu_field(tmp, condexec_bits); } - else if (s->condexec_mask_prev != 0) { - TCGv tmp = new_tmp(); - tcg_gen_movi_i32(tmp, 0); - store_cpu_field(tmp, condexec_bits); - } +} + +static void gen_exception_insn(DisasContext *s, int offset, int excp) +{ + gen_set_condexec(s); + gen_set_pc_im(s->pc - offset); + gen_exception(excp); + s->is_jmp = DISAS_JUMP; } static void gen_nop_hint(DisasContext *s, int val) @@ -3525,31 +3604,24 @@ static void gen_nop_hint(DisasContext *s, int val) } } -/* These macros help make the code more readable when migrating from the - old dyngen helpers. They should probably be removed when - T0/T1 are removed. */ -#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1] -#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1] - #define CPU_V001 cpu_V0, cpu_V0, cpu_V1 -static inline int gen_neon_add(int size) +static inline void gen_neon_add(int size, TCGv t0, TCGv t1) { switch (size) { - case 0: gen_helper_neon_add_u8(CPU_T001); break; - case 1: gen_helper_neon_add_u16(CPU_T001); break; - case 2: gen_op_addl_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_add_u8(t0, t0, t1); break; + case 1: gen_helper_neon_add_u16(t0, t0, t1); break; + case 2: tcg_gen_add_i32(t0, t0, t1); break; + default: abort(); } - return 0; } -static inline void gen_neon_rsb(int size) +static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) { switch (size) { - case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break; - case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break; - case 2: gen_op_rsbl_T0_T1(); break; + case 0: gen_helper_neon_sub_u8(t0, t1, t0); break; + case 1: gen_helper_neon_sub_u16(t0, t1, t0); break; + case 2: tcg_gen_sub_i32(t0, t1, t0); break; default: return; } } @@ -3560,125 +3632,178 @@ static inline void gen_neon_rsb(int size) #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 -/* FIXME: This is wrong. They set the wrong overflow bit. */ -#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) -#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) -#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) -#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) - -#define GEN_NEON_INTEGER_OP_ENV(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \ - break; \ - default: return 1; \ - }} while (0) - #define GEN_NEON_INTEGER_OP(name) do { \ switch ((size << 1) | u) { \ case 0: \ - gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \ break; \ case 1: \ - gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \ break; \ case 2: \ - gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \ break; \ case 3: \ - gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \ break; \ case 4: \ - gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \ break; \ case 5: \ - gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \ + gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \ break; \ default: return 1; \ }} while (0) -static inline void -gen_neon_movl_scratch_T0(int scratch) +static TCGv neon_load_scratch(int scratch) { - uint32_t offset; - - offset = offsetof(CPUARMState, vfp.scratch[scratch]); - tcg_gen_st_i32(cpu_T[0], cpu_env, offset); + TCGv tmp = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); + return tmp; } -static inline void -gen_neon_movl_scratch_T1(int scratch) +static void neon_store_scratch(int scratch, TCGv var) { - uint32_t offset; - - offset = offsetof(CPUARMState, vfp.scratch[scratch]); - tcg_gen_st_i32(cpu_T[1], cpu_env, offset); + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); + tcg_temp_free_i32(var); } -static inline void -gen_neon_movl_T0_scratch(int scratch) +static inline TCGv neon_get_scalar(int size, int reg) { - uint32_t offset; - - offset = offsetof(CPUARMState, vfp.scratch[scratch]); - tcg_gen_ld_i32(cpu_T[0], cpu_env, offset); + TCGv tmp; + if (size == 1) { + tmp = neon_load_reg(reg & 7, reg >> 4); + if (reg & 8) { + gen_neon_dup_high16(tmp); + } else { + gen_neon_dup_low16(tmp); + } + } else { + tmp = neon_load_reg(reg & 15, reg >> 4); + } + return tmp; } -static inline void -gen_neon_movl_T1_scratch(int scratch) +static int gen_neon_unzip(int rd, int rm, int size, int q) { - uint32_t offset; - - offset = offsetof(CPUARMState, vfp.scratch[scratch]); - tcg_gen_ld_i32(cpu_T[1], cpu_env, offset); + TCGv tmp, tmp2; + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + switch (size) { + case 0: + gen_helper_neon_qunzip8(tmp, tmp2); + break; + case 1: + gen_helper_neon_qunzip16(tmp, tmp2); + break; + case 2: + gen_helper_neon_qunzip32(tmp, tmp2); + break; + default: + abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_unzip8(tmp, tmp2); + break; + case 1: + gen_helper_neon_unzip16(tmp, tmp2); + break; + default: + abort(); + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; } -static inline void gen_neon_get_scalar(int size, int reg) +static int gen_neon_zip(int rd, int rm, int size, int q) { - if (size == 1) { - NEON_GET_REG(T0, reg >> 1, reg & 1); + TCGv tmp, tmp2; + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + switch (size) { + case 0: + gen_helper_neon_qzip8(tmp, tmp2); + break; + case 1: + gen_helper_neon_qzip16(tmp, tmp2); + break; + case 2: + gen_helper_neon_qzip32(tmp, tmp2); + break; + default: + abort(); + } } else { - NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1); - if (reg & 1) - gen_neon_dup_low16(cpu_T[0]); - else - gen_neon_dup_high16(cpu_T[0]); + switch (size) { + case 0: + gen_helper_neon_zip8(tmp, tmp2); + break; + case 1: + gen_helper_neon_zip16(tmp, tmp2); + break; + default: + abort(); + } } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; } -static void gen_neon_unzip(int reg, int q, int tmp, int size) +static void gen_neon_trn_u8(TCGv t0, TCGv t1) { - int n; + TCGv rd, tmp; - for (n = 0; n < q + 1; n += 2) { - NEON_GET_REG(T0, reg, n); - NEON_GET_REG(T0, reg, n + n); - switch (size) { - case 0: gen_helper_neon_unzip_u8(); break; - case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */ - case 2: /* no-op */; break; - default: abort(); - } - gen_neon_movl_scratch_T0(tmp + n); - gen_neon_movl_scratch_T1(tmp + n + 1); - } + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + + tcg_gen_shli_i32(rd, t0, 8); + tcg_gen_andi_i32(rd, rd, 0xff00ff00); + tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); + tcg_gen_or_i32(rd, rd, tmp); + + tcg_gen_shri_i32(t1, t1, 8); + tcg_gen_andi_i32(t1, t1, 0x00ff00ff); + tcg_gen_andi_i32(tmp, t0, 0xff00ff00); + tcg_gen_or_i32(t1, t1, tmp); + tcg_gen_mov_i32(t0, rd); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); } +static void gen_neon_trn_u16(TCGv t0, TCGv t1) +{ + TCGv rd, tmp; + + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + + tcg_gen_shli_i32(rd, t0, 16); + tcg_gen_andi_i32(tmp, t1, 0xffff); + tcg_gen_or_i32(rd, rd, tmp); + tcg_gen_shri_i32(t1, t1, 16); + tcg_gen_andi_i32(tmp, t0, 0xffff0000); + tcg_gen_or_i32(t1, t1, tmp); + tcg_gen_mov_i32(t0, rd); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); +} + + static struct { int nregs; int interleave; @@ -3705,17 +3830,18 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) int op; int nregs; int interleave; + int spacing; int stride; int size; int reg; int pass; int load; int shift; - int n; + TCGv addr; TCGv tmp; TCGv tmp2; - if (!vfp_enabled(env)) + if (!s->vfp_enabled) return 1; VFP_DREG_D(rd, insn); rn = (insn >> 16) & 0xf; @@ -3725,119 +3851,86 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Load store all elements. */ op = (insn >> 8) & 0xf; size = (insn >> 6) & 3; - if (op > 10 || size == 3) + if (op > 10) return 1; + /* Catch UNDEF cases for bad values of align field */ + switch (op & 0xc) { + case 4: + if (((insn >> 5) & 1) == 1) { + return 1; + } + break; + case 8: + if (((insn >> 4) & 3) == 3) { + return 1; + } + break; + default: + break; + } nregs = neon_ls_element_type[op].nregs; interleave = neon_ls_element_type[op].interleave; - gen_movl_T1_reg(s, rn); - stride = (1 << size) * interleave; - for (reg = 0; reg < nregs; reg++) { - if (interleave > 2 || (interleave == 2 && nregs == 2)) { - gen_movl_T1_reg(s, rn); - gen_op_addl_T1_im((1 << size) * reg); - } else if (interleave == 2 && nregs == 4 && reg == 2) { - gen_movl_T1_reg(s, rn); - gen_op_addl_T1_im(1 << size); - } - for (pass = 0; pass < 2; pass++) { - if (size == 2) { - if (load) { - tmp = gen_ld32(cpu_T[1], IS_USER(s)); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - gen_st32(tmp, cpu_T[1], IS_USER(s)); - } - gen_op_addl_T1_im(stride); - } else if (size == 1) { - if (load) { - tmp = gen_ld16u(cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - tmp2 = gen_ld16u(cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - gen_bfi(tmp, tmp, tmp2, 16, 0xffff); - dead_tmp(tmp2); - neon_store_reg(rd, pass, tmp); - } else { - tmp = neon_load_reg(rd, pass); - tmp2 = new_tmp(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_st16(tmp, cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - gen_st16(tmp2, cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - } - } else /* size == 0 */ { - if (load) { - TCGV_UNUSED(tmp2); - for (n = 0; n < 4; n++) { - tmp = gen_ld8u(cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - if (n == 0) { - tmp2 = tmp; - } else { - gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff); - dead_tmp(tmp); - } - } - neon_store_reg(rd, pass, tmp2); - } else { - tmp2 = neon_load_reg(rd, pass); - for (n = 0; n < 4; n++) { - tmp = new_tmp(); - if (n == 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_st8(tmp, cpu_T[1], IS_USER(s)); - gen_op_addl_T1_im(stride); - } - dead_tmp(tmp2); - } - } - } - rd += neon_ls_element_type[op].spacing; + spacing = neon_ls_element_type[op].spacing; + if (size == 3 && (interleave | spacing) != 1) { + return 1; } + addr = tcg_const_i32(insn); + gen_helper_neon_vldst_all(addr); + tcg_temp_free_i32(addr); stride = nregs * 8; } else { size = (insn >> 10) & 3; if (size == 3) { /* Load single element to all lanes. */ - if (!load) + int a = (insn >> 4) & 1; + if (!load) { return 1; + } size = (insn >> 6) & 3; nregs = ((insn >> 8) & 3) + 1; - stride = (insn & (1 << 5)) ? 2 : 1; - gen_movl_T1_reg(s, rn); - for (reg = 0; reg < nregs; reg++) { - switch (size) { - case 0: - tmp = gen_ld8u(cpu_T[1], IS_USER(s)); - gen_neon_dup_u8(tmp, 0); - break; - case 1: - tmp = gen_ld16u(cpu_T[1], IS_USER(s)); - gen_neon_dup_low16(tmp); - break; - case 2: - tmp = gen_ld32(cpu_T[1], IS_USER(s)); - break; - case 3: + + if (size == 3) { + if (nregs != 4 || a == 0) { return 1; - default: /* Avoid compiler warnings. */ - abort(); } - gen_op_addl_T1_im(1 << size); - tmp2 = new_tmp(); - tcg_gen_mov_i32(tmp2, tmp); - neon_store_reg(rd, 0, tmp2); - neon_store_reg(rd, 1, tmp); - rd += stride; + /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a == 1 && size == 0) { + return 1; + } + if (nregs == 3 && a == 1) { + return 1; } + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); + if (nregs == 1) { + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + if (insn & (1 << 5)) { + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); + } + tcg_temp_free_i32(tmp); + } else { + /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ + stride = (insn & (1 << 5)) ? 2 : 1; + for (reg = 0; reg < nregs; reg++) { + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; + } + } + tcg_temp_free_i32(addr); stride = (1 << size) * nregs; } else { /* Single element. */ + int idx = (insn >> 4) & 0xf; pass = (insn >> 7) & 1; switch (size) { case 0: @@ -3856,18 +3949,52 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) abort(); } nregs = ((insn >> 8) & 3) + 1; - gen_movl_T1_reg(s, rn); + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((idx & (1 << size)) != 0) || + (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { + return 1; + } + break; + case 3: + if ((idx & 1) != 0) { + return 1; + } + /* fall through */ + case 2: + if (size == 2 && (idx & 2) != 0) { + return 1; + } + break; + case 4: + if ((size == 2) && ((idx & 3) == 3)) { + return 1; + } + break; + default: + abort(); + } + if ((rd + stride * (nregs - 1)) > 31) { + /* Attempts to write off the end of the register file + * are UNPREDICTABLE; we choose to UNDEF because otherwise + * the neon_load_reg() would write off the end of the array. + */ + return 1; + } + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { switch (size) { case 0: - tmp = gen_ld8u(cpu_T[1], IS_USER(s)); + tmp = gen_ld8u(addr, IS_USER(s)); break; case 1: - tmp = gen_ld16u(cpu_T[1], IS_USER(s)); + tmp = gen_ld16u(addr, IS_USER(s)); break; case 2: - tmp = gen_ld32(cpu_T[1], IS_USER(s)); + tmp = gen_ld32(addr, IS_USER(s)); break; default: /* Avoid compiler warnings. */ abort(); @@ -3875,7 +4002,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) if (size != 2) { tmp2 = neon_load_reg(rd, pass); gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } neon_store_reg(rd, pass, tmp); } else { /* Store */ @@ -3884,19 +4011,20 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shri_i32(tmp, tmp, shift); switch (size) { case 0: - gen_st8(tmp, cpu_T[1], IS_USER(s)); + gen_st8(tmp, addr, IS_USER(s)); break; case 1: - gen_st16(tmp, cpu_T[1], IS_USER(s)); + gen_st16(tmp, addr, IS_USER(s)); break; case 2: - gen_st32(tmp, cpu_T[1], IS_USER(s)); + gen_st32(tmp, addr, IS_USER(s)); break; } } rd += stride; - gen_op_addl_T1_im(1 << size); + tcg_gen_addi_i32(addr, addr, 1 << size); } + tcg_temp_free_i32(addr); stride = nregs * (1 << size); } } @@ -3910,7 +4038,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGv index; index = load_reg(s, rm); tcg_gen_add_i32(base, base, index); - dead_tmp(index); + tcg_temp_free_i32(index); } store_reg(s, rn, base); } @@ -3921,7 +4049,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c) { tcg_gen_and_i32(t, t, c); - tcg_gen_bic_i32(f, f, c); + tcg_gen_andc_i32(f, f, c); tcg_gen_or_i32(dest, t, f); } @@ -3938,9 +4066,9 @@ static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src) static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src) { switch (size) { - case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break; + case 0: gen_helper_neon_narrow_sat_s8(dest, src); break; + case 1: gen_helper_neon_narrow_sat_s16(dest, src); break; + case 2: gen_helper_neon_narrow_sat_s32(dest, src); break; default: abort(); } } @@ -3948,9 +4076,19 @@ static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src) static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src) { switch (size) { - case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break; + case 0: gen_helper_neon_narrow_sat_u8(dest, src); break; + case 1: gen_helper_neon_narrow_sat_u16(dest, src); break; + case 2: gen_helper_neon_narrow_sat_u32(dest, src); break; + default: abort(); + } +} + +static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_unarrow_sat8(dest, src); break; + case 1: gen_helper_neon_unarrow_sat16(dest, src); break; + case 2: gen_helper_neon_unarrow_sat32(dest, src); break; default: abort(); } } @@ -3975,8 +4113,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift, } else { if (u) { switch (size) { - case 1: gen_helper_neon_rshl_u16(var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(var, var, shift); break; + case 1: gen_helper_neon_shl_u16(var, var, shift); break; + case 2: gen_helper_neon_shl_u32(var, var, shift); break; default: abort(); } } else { @@ -4006,7 +4144,7 @@ static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u) default: abort(); } } - dead_tmp(src); + tcg_temp_free_i32(src); } static inline void gen_neon_addl(int size) @@ -4042,8 +4180,8 @@ static inline void gen_neon_negl(TCGv_i64 var, int size) static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) { switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; + case 1: gen_helper_neon_addl_saturate_s32(op0, op0, op1); break; + case 2: gen_helper_neon_addl_saturate_s64(op0, op0, op1); break; default: abort(); } } @@ -4060,19 +4198,216 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u) case 4: tmp = gen_muls_i64_i32(a, b); tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); break; case 5: tmp = gen_mulu_i64_i32(a, b); tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); break; default: abort(); } + + /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. + Don't forget to clean them now. */ if (size < 2) { - dead_tmp(b); - dead_tmp(a); + tcg_temp_free_i32(a); + tcg_temp_free_i32(b); + } +} + +static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src) +{ + if (op) { + if (u) { + gen_neon_unarrow_sats(size, dest, src); + } else { + gen_neon_narrow(size, dest, src); + } + } else { + if (u) { + gen_neon_narrow_satu(size, dest, src); + } else { + gen_neon_narrow_sats(size, dest, src); + } } } +/* Symbolic constants for op fields for Neon 3-register same-length. + * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B + * table A7-9. + */ +#define NEON_3R_VHADD 0 +#define NEON_3R_VQADD 1 +#define NEON_3R_VRHADD 2 +#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */ +#define NEON_3R_VHSUB 4 +#define NEON_3R_VQSUB 5 +#define NEON_3R_VCGT 6 +#define NEON_3R_VCGE 7 +#define NEON_3R_VSHL 8 +#define NEON_3R_VQSHL 9 +#define NEON_3R_VRSHL 10 +#define NEON_3R_VQRSHL 11 +#define NEON_3R_VMAX 12 +#define NEON_3R_VMIN 13 +#define NEON_3R_VABD 14 +#define NEON_3R_VABA 15 +#define NEON_3R_VADD_VSUB 16 +#define NEON_3R_VTST_VCEQ 17 +#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VMUL 19 +#define NEON_3R_VPMAX 20 +#define NEON_3R_VPMIN 21 +#define NEON_3R_VQDMULH_VQRDMULH 22 +#define NEON_3R_VPADD 23 +#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ +#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ +#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ +#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ +#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ +#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */ + +static const uint8_t neon_3r_sizes[] = { + [NEON_3R_VHADD] = 0x7, + [NEON_3R_VQADD] = 0xf, + [NEON_3R_VRHADD] = 0x7, + [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */ + [NEON_3R_VHSUB] = 0x7, + [NEON_3R_VQSUB] = 0xf, + [NEON_3R_VCGT] = 0x7, + [NEON_3R_VCGE] = 0x7, + [NEON_3R_VSHL] = 0xf, + [NEON_3R_VQSHL] = 0xf, + [NEON_3R_VRSHL] = 0xf, + [NEON_3R_VQRSHL] = 0xf, + [NEON_3R_VMAX] = 0x7, + [NEON_3R_VMIN] = 0x7, + [NEON_3R_VABD] = 0x7, + [NEON_3R_VABA] = 0x7, + [NEON_3R_VADD_VSUB] = 0xf, + [NEON_3R_VTST_VCEQ] = 0x7, + [NEON_3R_VML] = 0x7, + [NEON_3R_VMUL] = 0x7, + [NEON_3R_VPMAX] = 0x7, + [NEON_3R_VPMIN] = 0x7, + [NEON_3R_VQDMULH_VQRDMULH] = 0x6, + [NEON_3R_VPADD] = 0x7, + [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */ +}; + +/* Symbolic constants for op fields for Neon 2-register miscellaneous. + * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B + * table A7-13. + */ +#define NEON_2RM_VREV64 0 +#define NEON_2RM_VREV32 1 +#define NEON_2RM_VREV16 2 +#define NEON_2RM_VPADDL 4 +#define NEON_2RM_VPADDL_U 5 +#define NEON_2RM_VCLS 8 +#define NEON_2RM_VCLZ 9 +#define NEON_2RM_VCNT 10 +#define NEON_2RM_VMVN 11 +#define NEON_2RM_VPADAL 12 +#define NEON_2RM_VPADAL_U 13 +#define NEON_2RM_VQABS 14 +#define NEON_2RM_VQNEG 15 +#define NEON_2RM_VCGT0 16 +#define NEON_2RM_VCGE0 17 +#define NEON_2RM_VCEQ0 18 +#define NEON_2RM_VCLE0 19 +#define NEON_2RM_VCLT0 20 +#define NEON_2RM_VABS 22 +#define NEON_2RM_VNEG 23 +#define NEON_2RM_VCGT0_F 24 +#define NEON_2RM_VCGE0_F 25 +#define NEON_2RM_VCEQ0_F 26 +#define NEON_2RM_VCLE0_F 27 +#define NEON_2RM_VCLT0_F 28 +#define NEON_2RM_VABS_F 30 +#define NEON_2RM_VNEG_F 31 +#define NEON_2RM_VSWP 32 +#define NEON_2RM_VTRN 33 +#define NEON_2RM_VUZP 34 +#define NEON_2RM_VZIP 35 +#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ +#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ +#define NEON_2RM_VSHLL 38 +#define NEON_2RM_VCVT_F16_F32 44 +#define NEON_2RM_VCVT_F32_F16 46 +#define NEON_2RM_VRECPE 56 +#define NEON_2RM_VRSQRTE 57 +#define NEON_2RM_VRECPE_F 58 +#define NEON_2RM_VRSQRTE_F 59 +#define NEON_2RM_VCVT_FS 60 +#define NEON_2RM_VCVT_FU 61 +#define NEON_2RM_VCVT_SF 62 +#define NEON_2RM_VCVT_UF 63 + +static int neon_2rm_is_float_op(int op) +{ + /* Return true if this neon 2reg-misc op is float-to-float */ + return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || + op >= NEON_2RM_VRECPE_F); +} + +/* Each entry in this array has bit n set if the insn allows + * size value n (otherwise it will UNDEF). Since unallocated + * op values will have no bits set they always UNDEF. + */ +static const uint8_t neon_2rm_sizes[] = { + [NEON_2RM_VREV64] = 0x7, + [NEON_2RM_VREV32] = 0x3, + [NEON_2RM_VREV16] = 0x1, + [NEON_2RM_VPADDL] = 0x7, + [NEON_2RM_VPADDL_U] = 0x7, + [NEON_2RM_VCLS] = 0x7, + [NEON_2RM_VCLZ] = 0x7, + [NEON_2RM_VCNT] = 0x1, + [NEON_2RM_VMVN] = 0x1, + [NEON_2RM_VPADAL] = 0x7, + [NEON_2RM_VPADAL_U] = 0x7, + [NEON_2RM_VQABS] = 0x7, + [NEON_2RM_VQNEG] = 0x7, + [NEON_2RM_VCGT0] = 0x7, + [NEON_2RM_VCGE0] = 0x7, + [NEON_2RM_VCEQ0] = 0x7, + [NEON_2RM_VCLE0] = 0x7, + [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_VABS] = 0x7, + [NEON_2RM_VNEG] = 0x7, + [NEON_2RM_VCGT0_F] = 0x4, + [NEON_2RM_VCGE0_F] = 0x4, + [NEON_2RM_VCEQ0_F] = 0x4, + [NEON_2RM_VCLE0_F] = 0x4, + [NEON_2RM_VCLT0_F] = 0x4, + [NEON_2RM_VABS_F] = 0x4, + [NEON_2RM_VNEG_F] = 0x4, + [NEON_2RM_VSWP] = 0x1, + [NEON_2RM_VTRN] = 0x7, + [NEON_2RM_VUZP] = 0x7, + [NEON_2RM_VZIP] = 0x7, + [NEON_2RM_VMOVN] = 0x7, + [NEON_2RM_VQMOVN] = 0x7, + [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_VCVT_F16_F32] = 0x2, + [NEON_2RM_VCVT_F32_F16] = 0x2, + [NEON_2RM_VRECPE] = 0x4, + [NEON_2RM_VRSQRTE] = 0x4, + [NEON_2RM_VRECPE_F] = 0x4, + [NEON_2RM_VRSQRTE_F] = 0x4, + [NEON_2RM_VCVT_FS] = 0x4, + [NEON_2RM_VCVT_FU] = 0x4, + [NEON_2RM_VCVT_SF] = 0x4, + [NEON_2RM_VCVT_UF] = 0x4, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -4089,14 +4424,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int count; int pairwise; int u; - int n; - uint32_t imm; - TCGv tmp; - TCGv tmp2; - TCGv tmp3; + uint32_t imm, mask; + TCGv tmp, tmp2, tmp3, tmp4, tmp5; TCGv_i64 tmp64; - if (!vfp_enabled(env)) + if (!s->vfp_enabled) return 1; q = (insn & (1 << 6)) != 0; u = (insn >> 24) & 1; @@ -4107,60 +4439,65 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if ((insn & (1 << 23)) == 0) { /* Three register same length. */ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); - if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9 - || op == 10 || op == 11 || op == 16)) { - /* 64-bit element instructions. */ + /* Catch invalid op and bad size combinations: UNDEF */ + if ((neon_3r_sizes[op] & (1 << size)) == 0) { + return 1; + } + /* All insns of this form UNDEF for either this condition or the + * superset of cases "Q==1"; we catch the latter later. + */ + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + if (size == 3 && op != NEON_3R_LOGIC) { + /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { neon_load_reg64(cpu_V0, rn + pass); neon_load_reg64(cpu_V1, rm + pass); switch (op) { - case 1: /* VQADD */ + case NEON_3R_VQADD: if (u) { - gen_helper_neon_add_saturate_u64(CPU_V001); + gen_helper_neon_qadd_u64(cpu_V0, cpu_V0, cpu_V1); } else { - gen_helper_neon_add_saturate_s64(CPU_V001); + gen_helper_neon_qadd_s64(cpu_V0, cpu_V0, cpu_V1); } break; - case 5: /* VQSUB */ + case NEON_3R_VQSUB: if (u) { - gen_helper_neon_sub_saturate_u64(CPU_V001); + gen_helper_neon_qsub_u64(cpu_V0, cpu_V0, cpu_V1); } else { - gen_helper_neon_sub_saturate_s64(CPU_V001); + gen_helper_neon_qsub_s64(cpu_V0, cpu_V0, cpu_V1); } break; - case 8: /* VSHL */ + case NEON_3R_VSHL: if (u) { gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); } else { gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 9: /* VQSHL */ + case NEON_3R_VQSHL: if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V0); + gen_helper_neon_qshl_u64(cpu_V0, cpu_V1, cpu_V0); } else { - gen_helper_neon_qshl_s64(cpu_V1, cpu_env, - cpu_V1, cpu_V0); + gen_helper_neon_qshl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 10: /* VRSHL */ + case NEON_3R_VRSHL: if (u) { gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); } else { gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 11: /* VQRSHL */ + case NEON_3R_VQRSHL: if (u) { - gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); + gen_helper_neon_qrshl_u64(cpu_V0, cpu_V1, cpu_V0); } else { - gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); + gen_helper_neon_qrshl_s64(cpu_V0, cpu_V1, cpu_V0); } break; - case 16: + case NEON_3R_VADD_VSUB: if (u) { tcg_gen_sub_i64(CPU_V001); } else { @@ -4174,301 +4511,327 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } return 0; } + pairwise = 0; switch (op) { - case 8: /* VSHL */ - case 9: /* VQSHL */ - case 10: /* VRSHL */ - case 11: /* VQRSHL */ + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: { int rtmp; /* Shift instruction operands are reversed. */ rtmp = rn; rn = rm; rm = rtmp; - pairwise = 0; } break; - case 20: /* VPMAX */ - case 21: /* VPMIN */ - case 23: /* VPADD */ + case NEON_3R_VPADD: + if (u) { + return 1; + } + /* Fall through */ + case NEON_3R_VPMAX: + case NEON_3R_VPMIN: pairwise = 1; break; - case 26: /* VPADD (float) */ - pairwise = (u && size < 2); + case NEON_3R_FLOAT_ARITH: + pairwise = (u && size < 2); /* if VPADD (float) */ + break; + case NEON_3R_FLOAT_MINMAX: + pairwise = u; /* if VPMIN/VPMAX (float) */ + break; + case NEON_3R_FLOAT_CMP: + if (!u && size) { + /* no encoding for U=0 C=1x */ + return 1; + } + break; + case NEON_3R_FLOAT_ACMP: + if (!u) { + return 1; + } + break; + case NEON_3R_VRECPS_VRSQRTS: + if (u) { + return 1; + } break; - case 30: /* VPMIN/VPMAX (float) */ - pairwise = u; + case NEON_3R_VMUL: + if (u && (size != 0)) { + /* UNDEF on invalid size for polynomial subcase */ + return 1; + } break; default: - pairwise = 0; break; } + + if (pairwise && q) { + /* All the pairwise insns UNDEF if Q is set */ + return 1; + } + for (pass = 0; pass < (q ? 4 : 2); pass++) { if (pairwise) { /* Pairwise. */ - if (q) - n = (pass & 1) * 2; - else - n = 0; - if (pass < q + 1) { - NEON_GET_REG(T0, rn, n); - NEON_GET_REG(T1, rn, n + 1); + if (pass < 1) { + tmp = neon_load_reg(rn, 0); + tmp2 = neon_load_reg(rn, 1); } else { - NEON_GET_REG(T0, rm, n); - NEON_GET_REG(T1, rm, n + 1); + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); } } else { /* Elementwise. */ - NEON_GET_REG(T0, rn, pass); - NEON_GET_REG(T1, rm, pass); + tmp = neon_load_reg(rn, pass); + tmp2 = neon_load_reg(rm, pass); } switch (op) { - case 0: /* VHADD */ + case NEON_3R_VHADD: GEN_NEON_INTEGER_OP(hadd); break; - case 1: /* VQADD */ - GEN_NEON_INTEGER_OP_ENV(qadd); + case NEON_3R_VQADD: + GEN_NEON_INTEGER_OP(qadd); break; - case 2: /* VRHADD */ + case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; - case 3: /* Logic ops. */ + case NEON_3R_LOGIC: /* Logic ops. */ switch ((u << 2) | size) { case 0: /* VAND */ - gen_op_andl_T0_T1(); + tcg_gen_and_i32(tmp, tmp, tmp2); break; case 1: /* BIC */ - gen_op_bicl_T0_T1(); + tcg_gen_andc_i32(tmp, tmp, tmp2); break; case 2: /* VORR */ - gen_op_orl_T0_T1(); + tcg_gen_or_i32(tmp, tmp, tmp2); break; case 3: /* VORN */ - gen_op_notl_T1(); - gen_op_orl_T0_T1(); + tcg_gen_orc_i32(tmp, tmp, tmp2); break; case 4: /* VEOR */ - gen_op_xorl_T0_T1(); + tcg_gen_xor_i32(tmp, tmp, tmp2); break; case 5: /* VBSL */ - tmp = neon_load_reg(rd, pass); - gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp); - dead_tmp(tmp); + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp2, tmp3); + tcg_temp_free_i32(tmp3); break; case 6: /* VBIT */ - tmp = neon_load_reg(rd, pass); - gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]); - dead_tmp(tmp); + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp3, tmp2); + tcg_temp_free_i32(tmp3); break; case 7: /* VBIF */ - tmp = neon_load_reg(rd, pass); - gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]); - dead_tmp(tmp); + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp3, tmp, tmp2); + tcg_temp_free_i32(tmp3); break; } break; - case 4: /* VHSUB */ + case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case 5: /* VQSUB */ - GEN_NEON_INTEGER_OP_ENV(qsub); + case NEON_3R_VQSUB: + GEN_NEON_INTEGER_OP(qsub); break; - case 6: /* VCGT */ + case NEON_3R_VCGT: GEN_NEON_INTEGER_OP(cgt); break; - case 7: /* VCGE */ + case NEON_3R_VCGE: GEN_NEON_INTEGER_OP(cge); break; - case 8: /* VSHL */ + case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; - case 9: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); + case NEON_3R_VQSHL: + GEN_NEON_INTEGER_OP(qshl); break; - case 10: /* VRSHL */ + case NEON_3R_VRSHL: GEN_NEON_INTEGER_OP(rshl); break; - case 11: /* VQRSHL */ - GEN_NEON_INTEGER_OP_ENV(qrshl); + case NEON_3R_VQRSHL: + GEN_NEON_INTEGER_OP(qrshl); break; - case 12: /* VMAX */ + case NEON_3R_VMAX: GEN_NEON_INTEGER_OP(max); break; - case 13: /* VMIN */ + case NEON_3R_VMIN: GEN_NEON_INTEGER_OP(min); break; - case 14: /* VABD */ + case NEON_3R_VABD: GEN_NEON_INTEGER_OP(abd); break; - case 15: /* VABA */ + case NEON_3R_VABA: GEN_NEON_INTEGER_OP(abd); - NEON_GET_REG(T1, rd, pass); - gen_neon_add(size); + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); + gen_neon_add(size, tmp, tmp2); break; - case 16: + case NEON_3R_VADD_VSUB: if (!u) { /* VADD */ - if (gen_neon_add(size)) - return 1; + gen_neon_add(size, tmp, tmp2); } else { /* VSUB */ switch (size) { - case 0: gen_helper_neon_sub_u8(CPU_T001); break; - case 1: gen_helper_neon_sub_u16(CPU_T001); break; - case 2: gen_op_subl_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; + default: abort(); } } break; - case 17: + case NEON_3R_VTST_VCEQ: if (!u) { /* VTST */ switch (size) { - case 0: gen_helper_neon_tst_u8(CPU_T001); break; - case 1: gen_helper_neon_tst_u16(CPU_T001); break; - case 2: gen_helper_neon_tst_u32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; + default: abort(); } } else { /* VCEQ */ switch (size) { - case 0: gen_helper_neon_ceq_u8(CPU_T001); break; - case 1: gen_helper_neon_ceq_u16(CPU_T001); break; - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; + default: abort(); } } break; - case 18: /* Multiply. */ + case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ switch (size) { - case 0: gen_helper_neon_mul_u8(CPU_T001); break; - case 1: gen_helper_neon_mul_u16(CPU_T001); break; - case 2: gen_op_mul_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); } - NEON_GET_REG(T1, rd, pass); + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); if (u) { /* VMLS */ - gen_neon_rsb(size); + gen_neon_rsb(size, tmp, tmp2); } else { /* VMLA */ - gen_neon_add(size); + gen_neon_add(size, tmp, tmp2); } break; - case 19: /* VMUL */ + case NEON_3R_VMUL: if (u) { /* polynomial */ - gen_helper_neon_mul_p8(CPU_T001); + gen_helper_neon_mul_p8(tmp, tmp, tmp2); } else { /* Integer */ switch (size) { - case 0: gen_helper_neon_mul_u8(CPU_T001); break; - case 1: gen_helper_neon_mul_u16(CPU_T001); break; - case 2: gen_op_mul_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); } } break; - case 20: /* VPMAX */ + case NEON_3R_VPMAX: GEN_NEON_INTEGER_OP(pmax); break; - case 21: /* VPMIN */ + case NEON_3R_VPMIN: GEN_NEON_INTEGER_OP(pmin); break; - case 22: /* Hultiply high. */ + case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ if (!u) { /* VQDMULH */ switch (size) { - case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break; - case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break; - default: return 1; + case 1: gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); break; + default: abort(); } - } else { /* VQRDHMUL */ + } else { /* VQRDMULH */ switch (size) { - case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break; - case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break; - default: return 1; + case 1: gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); break; + default: abort(); } } break; - case 23: /* VPADD */ - if (u) - return 1; + case NEON_3R_VPADD: switch (size) { - case 0: gen_helper_neon_padd_u8(CPU_T001); break; - case 1: gen_helper_neon_padd_u16(CPU_T001); break; - case 2: gen_op_addl_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; + default: abort(); } break; - case 26: /* Floating point arithnetic. */ + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ switch ((u << 2) | size) { case 0: /* VADD */ - gen_helper_neon_add_f32(CPU_T001); + gen_helper_neon_add_f32(tmp, tmp, tmp2); break; case 2: /* VSUB */ - gen_helper_neon_sub_f32(CPU_T001); + gen_helper_neon_sub_f32(tmp, tmp, tmp2); break; case 4: /* VPADD */ - gen_helper_neon_add_f32(CPU_T001); + gen_helper_neon_add_f32(tmp, tmp, tmp2); break; case 6: /* VABD */ - gen_helper_neon_abd_f32(CPU_T001); + gen_helper_neon_abd_f32(tmp, tmp, tmp2); break; default: - return 1; + abort(); } break; - case 27: /* Float multiply. */ - gen_helper_neon_mul_f32(CPU_T001); + case NEON_3R_FLOAT_MULTIPLY: + gen_helper_neon_mul_f32(tmp, tmp, tmp2); if (!u) { - NEON_GET_REG(T1, rd, pass); + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); if (size == 0) { - gen_helper_neon_add_f32(CPU_T001); + gen_helper_neon_add_f32(tmp, tmp, tmp2); } else { - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); + gen_helper_neon_sub_f32(tmp, tmp2, tmp); } } break; - case 28: /* Float compare. */ + case NEON_3R_FLOAT_CMP: if (!u) { - gen_helper_neon_ceq_f32(CPU_T001); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2); } else { if (size == 0) - gen_helper_neon_cge_f32(CPU_T001); + gen_helper_neon_cge_f32(tmp, tmp, tmp2); else - gen_helper_neon_cgt_f32(CPU_T001); + gen_helper_neon_cgt_f32(tmp, tmp, tmp2); } break; - case 29: /* Float compare absolute. */ - if (!u) - return 1; + case NEON_3R_FLOAT_ACMP: if (size == 0) - gen_helper_neon_acge_f32(CPU_T001); + gen_helper_neon_acge_f32(tmp, tmp, tmp2); else - gen_helper_neon_acgt_f32(CPU_T001); + gen_helper_neon_acgt_f32(tmp, tmp, tmp2); break; - case 30: /* Float min/max. */ + case NEON_3R_FLOAT_MINMAX: if (size == 0) - gen_helper_neon_max_f32(CPU_T001); + gen_helper_neon_max_f32(tmp, tmp, tmp2); else - gen_helper_neon_min_f32(CPU_T001); + gen_helper_neon_min_f32(tmp, tmp, tmp2); break; - case 31: + case NEON_3R_VRECPS_VRSQRTS: if (size == 0) - gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); else - gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env); + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); break; default: abort(); } + tcg_temp_free_i32(tmp2); + /* Save the result. For elementwise operations we can put it straight into the destination register. For pairwise operations we have to be careful to avoid clobbering the source operands. */ if (pairwise && rd == rm) { - gen_neon_movl_scratch_T0(pass); + neon_store_scratch(pass, tmp); } else { - NEON_SET_REG(T0, rd, pass); + neon_store_reg(rd, pass, tmp); } } /* for pass */ if (pairwise && rd == rm) { for (pass = 0; pass < (q ? 4 : 2); pass++) { - gen_neon_movl_T0_scratch(pass); - NEON_SET_REG(T0, rd, pass); + tmp = neon_load_scratch(pass); + neon_store_reg(rd, pass, tmp); } } /* End of 3 register same size operations. */ @@ -4477,7 +4840,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two registers and shift. */ op = (insn >> 8) & 0xf; if (insn & (1 << 7)) { - /* 64-bit shift. */ + /* 64-bit shift. */ + if (op > 7) { + return 1; + } size = 3; } else { size = 2; @@ -4490,6 +4856,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (op < 8) { /* Shift by immediate: VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ + if (q && ((rd | rm) & 1)) { + return 1; + } + if (!u && (op == 4 || op == 6)) { + return 1; + } /* Right shifts are encoded as N - shift, where N is the element size in bits. */ if (op <= 4) @@ -4537,36 +4909,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); break; case 4: /* VSRI */ - if (!u) - return 1; - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); - break; case 5: /* VSHL, VSLI */ gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); break; - case 6: /* VQSHL */ - if (u) - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); - else - gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1); + case 6: /* VQSHLU */ + gen_helper_neon_qshlu_s64(cpu_V0, cpu_V0, cpu_V1); break; - case 7: /* VQSHLU */ - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1); + case 7: /* VQSHL */ + if (u) { + gen_helper_neon_qshl_u64(cpu_V0, + cpu_V0, cpu_V1); + } else { + gen_helper_neon_qshl_s64(cpu_V0, + cpu_V0, cpu_V1); + } break; } if (op == 1 || op == 3) { /* Accumulate. */ - neon_load_reg64(cpu_V0, rd + pass); + neon_load_reg64(cpu_V1, rd + pass); tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); } else if (op == 4 || (op == 5 && u)) { /* Insert */ - cpu_abort(env, "VS[LR]I.64 not implemented"); + neon_load_reg64(cpu_V1, rd + pass); + uint64_t mask; + if (shift < -63 || shift > 63) { + mask = 0; + } else { + if (op == 4) { + mask = 0xffffffffffffffffull >> -shift; + } else { + mask = 0xffffffffffffffffull << shift; + } + } + tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); } neon_store_reg64(cpu_V0, rd + pass); } else { /* size < 3 */ /* Operands in T0 and T1. */ - gen_op_movl_T1_im(imm); - NEON_GET_REG(T0, rm, pass); + tmp = neon_load_reg(rm, pass); + tmp2 = tcg_const_i32(imm); switch (op) { case 0: /* VSHR */ case 1: /* VSRA */ @@ -4577,138 +4960,158 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) GEN_NEON_INTEGER_OP(rshl); break; case 4: /* VSRI */ - if (!u) - return 1; - GEN_NEON_INTEGER_OP(shl); - break; case 5: /* VSHL, VSLI */ switch (size) { - case 0: gen_helper_neon_shl_u8(CPU_T001); break; - case 1: gen_helper_neon_shl_u16(CPU_T001); break; - case 2: gen_helper_neon_shl_u32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; + default: abort(); } break; - case 6: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case 7: /* VQSHLU */ + case 6: /* VQSHLU */ switch (size) { - case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break; - case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break; - case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break; - default: return 1; + case 0: + gen_helper_neon_qshlu_s8(tmp, tmp, tmp2); + break; + case 1: + gen_helper_neon_qshlu_s16(tmp, tmp, tmp2); + break; + case 2: + gen_helper_neon_qshlu_s32(tmp, tmp, tmp2); + break; + default: + abort(); } break; + case 7: /* VQSHL */ + GEN_NEON_INTEGER_OP(qshl); + break; } + tcg_temp_free_i32(tmp2); if (op == 1 || op == 3) { /* Accumulate. */ - NEON_GET_REG(T1, rd, pass); - gen_neon_add(size); + tmp2 = neon_load_reg(rd, pass); + gen_neon_add(size, tmp, tmp2); + tcg_temp_free_i32(tmp2); } else if (op == 4 || (op == 5 && u)) { /* Insert */ switch (size) { case 0: if (op == 4) - imm = 0xff >> -shift; + mask = 0xff >> -shift; else - imm = (uint8_t)(0xff << shift); - imm |= imm << 8; - imm |= imm << 16; + mask = (uint8_t)(0xff << shift); + mask |= mask << 8; + mask |= mask << 16; break; case 1: if (op == 4) - imm = 0xffff >> -shift; + mask = 0xffff >> -shift; else - imm = (uint16_t)(0xffff << shift); - imm |= imm << 16; + mask = (uint16_t)(0xffff << shift); + mask |= mask << 16; break; case 2: - if (op == 4) - imm = 0xffffffffu >> -shift; - else - imm = 0xffffffffu << shift; + if (shift < -31 || shift > 31) { + mask = 0; + } else { + if (op == 4) + mask = 0xffffffffu >> -shift; + else + mask = 0xffffffffu << shift; + } break; default: abort(); } - tmp = neon_load_reg(rd, pass); - tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm); - tcg_gen_andi_i32(tmp, tmp, ~imm); - tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp); + tmp2 = neon_load_reg(rd, pass); + tcg_gen_andi_i32(tmp, tmp, mask); + tcg_gen_andi_i32(tmp2, tmp2, ~mask); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); } - NEON_SET_REG(T0, rd, pass); + neon_store_reg(rd, pass, tmp); } } /* for pass */ } else if (op < 10) { /* Shift by immediate and narrow: VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ + int input_unsigned = (op == 8) ? !u : u; + if (rm & 1) { + return 1; + } shift = shift - (1 << (size + 3)); size++; - switch (size) { - case 1: - imm = (uint16_t)shift; - imm |= imm << 16; - tmp2 = tcg_const_i32(imm); - TCGV_UNUSED_I64(tmp64); - break; - case 2: - imm = (uint32_t)shift; - tmp2 = tcg_const_i32(imm); - TCGV_UNUSED_I64(tmp64); - break; - case 3: + if (size == 3) { tmp64 = tcg_const_i64(shift); - TCGV_UNUSED(tmp2); - break; - default: - abort(); - } - - for (pass = 0; pass < 2; pass++) { - if (size == 3) { - neon_load_reg64(cpu_V0, rm + pass); + neon_load_reg64(cpu_V0, rm); + neon_load_reg64(cpu_V1, rm + 1); + for (pass = 0; pass < 2; pass++) { + TCGv_i64 in; + if (pass == 0) { + in = cpu_V0; + } else { + in = cpu_V1; + } if (q) { - if (u) - gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64); + if (input_unsigned) { + gen_helper_neon_rshl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_rshl_s64(cpu_V0, in, tmp64); + } } else { - if (u) - gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64); - else - gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64); + if (input_unsigned) { + gen_helper_neon_shl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_shl_s64(cpu_V0, in, tmp64); + } } + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ + tcg_temp_free_i64(tmp64); + } else { + if (size == 1) { + imm = (uint16_t)shift; + imm |= imm << 16; } else { - tmp = neon_load_reg(rm + pass, 0); - gen_neon_shift_narrow(size, tmp, tmp2, q, u); - tmp3 = neon_load_reg(rm + pass, 1); - gen_neon_shift_narrow(size, tmp3, tmp2, q, u); - tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); - dead_tmp(tmp); - dead_tmp(tmp3); - } - tmp = new_tmp(); - if (op == 8 && !u) { - gen_neon_narrow(size - 1, tmp, cpu_V0); - } else { - if (op == 8) - gen_neon_narrow_sats(size - 1, tmp, cpu_V0); - else - gen_neon_narrow_satu(size - 1, tmp, cpu_V0); - } - if (pass == 0) { - tmp2 = tmp; - } else { - neon_store_reg(rd, 0, tmp2); - neon_store_reg(rd, 1, tmp); + /* size == 2 */ + imm = (uint32_t)shift; } - } /* for pass */ + tmp2 = tcg_const_i32(imm); + tmp4 = neon_load_reg(rm + 1, 0); + tmp5 = neon_load_reg(rm + 1, 1); + for (pass = 0; pass < 2; pass++) { + if (pass == 0) { + tmp = neon_load_reg(rm, 0); + } else { + tmp = tmp4; + } + gen_neon_shift_narrow(size, tmp, tmp2, q, + input_unsigned); + if (pass == 0) { + tmp3 = neon_load_reg(rm, 1); + } else { + tmp3 = tmp5; + } + gen_neon_shift_narrow(size, tmp3, tmp2, q, + input_unsigned); + tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp3); + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ + tcg_temp_free_i32(tmp2); + } } else if (op == 10) { - /* VSHLL */ - if (q || size == 3) + /* VSHLL, VMOVL */ + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { @@ -4721,34 +5124,53 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* The shift is less than the width of the source type, so we can just shift the whole register. */ tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); + /* Widen the result of shift: we need to clear + * the potential overflow bits resulting from + * left bits of the narrow input appearing as + * right bits of left the neighbour narrow + * input. */ if (size < 2 || !u) { uint64_t imm64; if (size == 0) { imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + } else if (size == 1) { imm = 0xffff >> (16 - shift); + } else { + /* size == 2 */ + imm = 0xffffffff >> (32 - shift); + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); - tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); + tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64); } } neon_store_reg64(cpu_V0, rd + pass); } - } else if (op == 15 || op == 16) { + } else if (op >= 14) { /* VCVT fixed-point. */ + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { + return 1; + } + /* We have already masked out the must-be-1 top bit of imm6, + * hence this 32-shift where the ARM ARM has 64-imm6. + */ + shift = 32 - shift; for (pass = 0; pass < (q ? 4 : 2); pass++) { tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); - if (op & 1) { + if (!(op & 1)) { if (u) - gen_vfp_ulto(0, shift); + gen_vfp_ulto(0, shift, 1); else - gen_vfp_slto(0, shift); + gen_vfp_slto(0, shift, 1); } else { if (u) - gen_vfp_toul(0, shift); + gen_vfp_toul(0, shift, 1); else - gen_vfp_tosl(0, shift); + gen_vfp_tosl(0, shift, 1); } tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } @@ -4757,11 +5179,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else { /* (insn & 0x00380080) == 0 */ int invert; + if (q && (rd & 1)) { + return 1; + } op = (insn >> 8) & 0xf; /* One register and immediate. */ imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); invert = (insn & (1 << 5)) != 0; + /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + */ switch (op) { case 0: case 1: /* no-op */ @@ -4782,7 +5211,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) imm = (imm << 8) | (imm << 24); break; case 12: - imm = (imm < 8) | 0xff; + imm = (imm << 8) | 0xff; break; case 13: imm = (imm << 16) | 0xffff; @@ -4793,6 +5222,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) imm = ~imm; break; case 15: + if (invert) { + return 1; + } imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); break; @@ -4800,9 +5232,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (invert) imm = ~imm; - if (op != 14 || !invert) - gen_op_movl_T1_im(imm); - for (pass = 0; pass < (q ? 4 : 2); pass++) { if (op & 1 && op < 12) { tmp = neon_load_reg(rd, pass); @@ -4815,8 +5244,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } else { /* VMOV, VMVN. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (op == 14 && invert) { + int n; uint32_t val; val = 0; for (n = 0; n < 4; n++) { @@ -4839,41 +5269,57 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) int src1_wide; int src2_wide; int prewiden; - /* prewiden, src1_wide, src2_wide */ - static const int neon_3reg_wide[16][3] = { - {1, 0, 0}, /* VADDL */ - {1, 1, 0}, /* VADDW */ - {1, 0, 0}, /* VSUBL */ - {1, 1, 0}, /* VSUBW */ - {0, 1, 1}, /* VADDHN */ - {0, 0, 0}, /* VABAL */ - {0, 1, 1}, /* VSUBHN */ - {0, 0, 0}, /* VABDL */ - {0, 0, 0}, /* VMLAL */ - {0, 0, 0}, /* VQDMLAL */ - {0, 0, 0}, /* VMLSL */ - {0, 0, 0}, /* VQDMLSL */ - {0, 0, 0}, /* Integer VMULL */ - {0, 0, 0}, /* VQDMULL */ - {0, 0, 0} /* Polynomial VMULL */ + /* undefreq: bit 0 : UNDEF if size != 0 + * bit 1 : UNDEF if size == 0 + * bit 2 : UNDEF if U == 1 + * Note that [1:0] set implies 'always UNDEF' + */ + int undefreq; + /* prewiden, src1_wide, src2_wide, undefreq */ + static const int neon_3reg_wide[16][4] = { + {1, 0, 0, 0}, /* VADDL */ + {1, 1, 0, 0}, /* VADDW */ + {1, 0, 0, 0}, /* VSUBL */ + {1, 1, 0, 0}, /* VSUBW */ + {0, 1, 1, 0}, /* VADDHN */ + {0, 0, 0, 0}, /* VABAL */ + {0, 1, 1, 0}, /* VSUBHN */ + {0, 0, 0, 0}, /* VABDL */ + {0, 0, 0, 0}, /* VMLAL */ + {0, 0, 0, 6}, /* VQDMLAL */ + {0, 0, 0, 0}, /* VMLSL */ + {0, 0, 0, 6}, /* VQDMLSL */ + {0, 0, 0, 0}, /* Integer VMULL */ + {0, 0, 0, 2}, /* VQDMULL */ + {0, 0, 0, 5}, /* Polynomial VMULL */ + {0, 0, 0, 3}, /* Reserved: always UNDEF */ }; prewiden = neon_3reg_wide[op][0]; src1_wide = neon_3reg_wide[op][1]; src2_wide = neon_3reg_wide[op][2]; + undefreq = neon_3reg_wide[op][3]; - if (size == 0 && (op == 9 || op == 11 || op == 13)) + if (((undefreq & 1) && (size != 0)) || + ((undefreq & 2) && (size == 0)) || + ((undefreq & 4) && u)) { + return 1; + } + if ((src1_wide && (rn & 1)) || + (src2_wide && (rm & 1)) || + (!src2_wide && (rd & 1))) { return 1; + } /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide destinations in problematic ways. */ if (rd == rm && !src2_wide) { - NEON_GET_REG(T0, rm, 1); - gen_neon_movl_scratch_T0(2); + tmp = neon_load_reg(rm, 1); + neon_store_scratch(2, tmp); } else if (rd == rn && !src1_wide) { - NEON_GET_REG(T0, rn, 1); - gen_neon_movl_scratch_T0(2); + tmp = neon_load_reg(rn, 1); + neon_store_scratch(2, tmp); } TCGV_UNUSED(tmp3); for (pass = 0; pass < 2; pass++) { @@ -4882,9 +5328,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGV_UNUSED(tmp); } else { if (pass == 1 && rd == rn) { - gen_neon_movl_T0_scratch(2); - tmp = new_tmp(); - tcg_gen_mov_i32(tmp, cpu_T[0]); + tmp = neon_load_scratch(2); } else { tmp = neon_load_reg(rn, pass); } @@ -4897,9 +5341,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) TCGV_UNUSED(tmp2); } else { if (pass == 1 && rd == rm) { - gen_neon_movl_T0_scratch(2); - tmp2 = new_tmp(); - tcg_gen_mov_i32(tmp2, cpu_T[0]); + tmp2 = neon_load_scratch(2); } else { tmp2 = neon_load_reg(rm, pass); } @@ -4911,7 +5353,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ gen_neon_addl(size); break; - case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */ + case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ gen_neon_subl(size); break; case 5: case 7: /* VABAL, VABDL */ @@ -4936,49 +5378,50 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) break; default: abort(); } - dead_tmp(tmp2); - dead_tmp(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 8: case 9: case 10: case 11: case 12: case 13: /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ gen_neon_mull(cpu_V0, tmp, tmp2, size, u); break; case 14: /* Polynomial VMULL */ - cpu_abort(env, "Polynomial VMULL not implemented"); - - default: /* 15 is RESERVED. */ - return 1; + gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + default: /* 15 is RESERVED: caught earlier */ + abort(); } - if (op == 5 || op == 13 || (op >= 8 && op <= 11)) { + if (op == 13) { + /* VQDMULL */ + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + neon_store_reg64(cpu_V0, rd + pass); + } else if (op == 5 || (op >= 8 && op <= 11)) { /* Accumulate. */ - if (op == 10 || op == 11) { - gen_neon_negl(cpu_V0, size); - } - - if (op != 13) { - neon_load_reg64(cpu_V1, rd + pass); - } - + neon_load_reg64(cpu_V1, rd + pass); switch (op) { - case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */ + case 10: /* VMLSL */ + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 5: case 8: /* VABAL, VMLAL */ gen_neon_addl(size); break; case 9: case 11: /* VQDMLAL, VQDMLSL */ gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 11) { + gen_neon_negl(cpu_V0, size); + } gen_neon_addl_saturate(cpu_V0, cpu_V1, size); break; - /* Fall through. */ - case 13: /* VQDMULL */ - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - break; default: abort(); } neon_store_reg64(cpu_V0, rd + pass); } else if (op == 4 || op == 6) { /* Narrowing operation. */ - tmp = new_tmp(); - if (u) { + tmp = tcg_temp_new_i32(); + if (!u) { switch (size) { case 0: gen_helper_neon_narrow_high_u8(tmp, cpu_V0); @@ -5020,101 +5463,124 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } } else { - /* Two registers and a scalar. */ + /* Two registers and a scalar. NB that for ops of this form + * the ARM ARM labels bit 24 as Q, but it is in our variable + * 'u', not 'q'. + */ + if (size == 0) { + return 1; + } switch (op) { - case 0: /* Integer VMLA scalar */ case 1: /* Float VMLA scalar */ - case 4: /* Integer VMLS scalar */ case 5: /* Floating point VMLS scalar */ - case 8: /* Integer VMUL scalar */ case 9: /* Floating point VMUL scalar */ + if (size == 1) { + return 1; + } + /* fall through */ + case 0: /* Integer VMLA scalar */ + case 4: /* Integer VMLS scalar */ + case 8: /* Integer VMUL scalar */ case 12: /* VQDMULH scalar */ case 13: /* VQRDMULH scalar */ - gen_neon_get_scalar(size, rm); - gen_neon_movl_scratch_T0(0); + if (u && ((rd | rn) & 1)) { + return 1; + } + tmp = neon_get_scalar(size, rm); + neon_store_scratch(0, tmp); for (pass = 0; pass < (u ? 4 : 2); pass++) { - if (pass != 0) - gen_neon_movl_T0_scratch(0); - NEON_GET_REG(T1, rn, pass); + tmp = neon_load_scratch(0); + tmp2 = neon_load_reg(rn, pass); if (op == 12) { if (size == 1) { - gen_helper_neon_qdmulh_s16(CPU_T0E01); + gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); } else { - gen_helper_neon_qdmulh_s32(CPU_T0E01); + gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); } } else if (op == 13) { if (size == 1) { - gen_helper_neon_qrdmulh_s16(CPU_T0E01); + gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); } else { - gen_helper_neon_qrdmulh_s32(CPU_T0E01); + gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); } } else if (op & 1) { - gen_helper_neon_mul_f32(CPU_T001); + gen_helper_neon_mul_f32(tmp, tmp, tmp2); } else { switch (size) { - case 0: gen_helper_neon_mul_u8(CPU_T001); break; - case 1: gen_helper_neon_mul_u16(CPU_T001); break; - case 2: gen_op_mul_T0_T1(); break; - default: return 1; + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); } } + tcg_temp_free_i32(tmp2); if (op < 8) { /* Accumulate. */ - NEON_GET_REG(T1, rd, pass); + tmp2 = neon_load_reg(rd, pass); switch (op) { case 0: - gen_neon_add(size); + gen_neon_add(size, tmp, tmp2); break; case 1: - gen_helper_neon_add_f32(CPU_T001); + gen_helper_neon_add_f32(tmp, tmp, tmp2); break; case 4: - gen_neon_rsb(size); + gen_neon_rsb(size, tmp, tmp2); break; case 5: - gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]); + gen_helper_neon_sub_f32(tmp, tmp2, tmp); break; default: abort(); } + tcg_temp_free_i32(tmp2); } - NEON_SET_REG(T0, rd, pass); + neon_store_reg(rd, pass, tmp); } break; - case 2: /* VMLAL sclar */ case 3: /* VQDMLAL scalar */ - case 6: /* VMLSL scalar */ case 7: /* VQDMLSL scalar */ - case 10: /* VMULL scalar */ case 11: /* VQDMULL scalar */ - if (size == 0 && (op == 3 || op == 7 || op == 11)) + if (u == 1) { return 1; - - gen_neon_get_scalar(size, rm); - NEON_GET_REG(T1, rn, 1); + } + /* fall through */ + case 2: /* VMLAL sclar */ + case 6: /* VMLSL scalar */ + case 10: /* VMULL scalar */ + if (rd & 1) { + return 1; + } + tmp2 = neon_get_scalar(size, rm); + /* We need a copy of tmp2 because gen_neon_mull + * deletes it during pass 0. */ + tmp4 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp4, tmp2); + tmp3 = neon_load_reg(rn, 1); for (pass = 0; pass < 2; pass++) { if (pass == 0) { tmp = neon_load_reg(rn, 0); } else { - tmp = new_tmp(); - tcg_gen_mov_i32(tmp, cpu_T[1]); + tmp = tmp3; + tmp2 = tmp4; } - tmp2 = new_tmp(); - tcg_gen_mov_i32(tmp2, cpu_T[0]); gen_neon_mull(cpu_V0, tmp, tmp2, size, u); - if (op == 6 || op == 7) { - gen_neon_negl(cpu_V0, size); - } if (op != 11) { neon_load_reg64(cpu_V1, rd + pass); } switch (op) { - case 2: case 6: + case 6: + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 2: gen_neon_addl(size); break; case 3: case 7: gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 7) { + gen_neon_negl(cpu_V0, size); + } gen_neon_addl_saturate(cpu_V0, cpu_V1, size); break; case 10: @@ -5128,6 +5594,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } neon_store_reg64(cpu_V0, rd + pass); } + + break; default: /* 14 and 15 are RESERVED */ return 1; @@ -5137,11 +5605,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (!u) { /* Extract. */ imm = (insn >> 8) & 0xf; - count = q + 1; if (imm > 7 && !q) return 1; + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + if (imm == 0) { neon_load_reg64(cpu_V0, rn); if (q) { @@ -5173,6 +5644,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); tcg_gen_shri_i64(tmp64, tmp64, imm * 8); tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64); + tcg_temp_free_i64(tmp64); } else { /* BUGFIX */ neon_load_reg64(cpu_V0, rn); @@ -5189,37 +5661,40 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) /* Two register misc. */ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); size = (insn >> 18) & 3; + /* UNDEF for unknown op values and bad op-size combinations */ + if ((neon_2rm_sizes[op] & (1 << size)) == 0) { + return 1; + } + if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && + q && ((rm | rd) & 1)) { + return 1; + } switch (op) { - case 0: /* VREV64 */ - if (size == 3) - return 1; + case NEON_2RM_VREV64: for (pass = 0; pass < (q ? 2 : 1); pass++) { - NEON_GET_REG(T0, rm, pass * 2); - NEON_GET_REG(T1, rm, pass * 2 + 1); + tmp = neon_load_reg(rm, pass * 2); + tmp2 = neon_load_reg(rm, pass * 2 + 1); switch (size) { - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; - case 1: gen_swap_half(cpu_T[0]); break; + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; + case 1: gen_swap_half(tmp); break; case 2: /* no-op */ break; default: abort(); } - NEON_SET_REG(T0, rd, pass * 2 + 1); + neon_store_reg(rd, pass * 2 + 1, tmp); if (size == 2) { - NEON_SET_REG(T1, rd, pass * 2); + neon_store_reg(rd, pass * 2, tmp2); } else { - gen_op_movl_T0_T1(); switch (size) { - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; - case 1: gen_swap_half(cpu_T[0]); break; + case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break; + case 1: gen_swap_half(tmp2); break; default: abort(); } - NEON_SET_REG(T0, rd, pass * 2); + neon_store_reg(rd, pass * 2, tmp2); } } break; - case 4: case 5: /* VPADDL */ - case 12: case 13: /* VPADAL */ - if (size == 3) - return 1; + case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: + case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: for (pass = 0; pass < q + 1; pass++) { tmp = neon_load_reg(rm, pass * 2); gen_neon_widen(cpu_V0, tmp, size, op & 1); @@ -5231,7 +5706,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 2: tcg_gen_add_i64(CPU_V001); break; default: abort(); } - if (op >= 12) { + if (op >= NEON_2RM_VPADAL) { /* Accumulate. */ neon_load_reg64(cpu_V1, rd + pass); gen_neon_addl(size); @@ -5239,85 +5714,40 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) neon_store_reg64(cpu_V0, rd + pass); } break; - case 33: /* VTRN */ + case NEON_2RM_VTRN: if (size == 2) { + int n; for (n = 0; n < (q ? 4 : 2); n += 2) { - NEON_GET_REG(T0, rm, n); - NEON_GET_REG(T1, rd, n + 1); - NEON_SET_REG(T1, rm, n); - NEON_SET_REG(T0, rd, n + 1); + tmp = neon_load_reg(rm, n); + tmp2 = neon_load_reg(rd, n + 1); + neon_store_reg(rm, n, tmp2); + neon_store_reg(rd, n + 1, tmp); } } else { goto elementwise; } break; - case 34: /* VUZP */ - /* Reg Before After - Rd A3 A2 A1 A0 B2 B0 A2 A0 - Rm B3 B2 B1 B0 B3 B1 A3 A1 - */ - if (size == 3) + case NEON_2RM_VUZP: + if (gen_neon_unzip(rd, rm, size, q)) { return 1; - gen_neon_unzip(rd, q, 0, size); - gen_neon_unzip(rm, q, 4, size); - if (q) { - static int unzip_order_q[8] = - {0, 2, 4, 6, 1, 3, 5, 7}; - for (n = 0; n < 8; n++) { - int reg = (n < 4) ? rd : rm; - gen_neon_movl_T0_scratch(unzip_order_q[n]); - NEON_SET_REG(T0, reg, n % 4); - } - } else { - static int unzip_order[4] = - {0, 4, 1, 5}; - for (n = 0; n < 4; n++) { - int reg = (n < 2) ? rd : rm; - gen_neon_movl_T0_scratch(unzip_order[n]); - NEON_SET_REG(T0, reg, n % 2); - } } break; - case 35: /* VZIP */ - /* Reg Before After - Rd A3 A2 A1 A0 B1 A1 B0 A0 - Rm B3 B2 B1 B0 B3 A3 B2 A2 - */ - if (size == 3) + case NEON_2RM_VZIP: + if (gen_neon_zip(rd, rm, size, q)) { return 1; - count = (q ? 4 : 2); - for (n = 0; n < count; n++) { - NEON_GET_REG(T0, rd, n); - NEON_GET_REG(T1, rd, n); - switch (size) { - case 0: gen_helper_neon_zip_u8(); break; - case 1: gen_helper_neon_zip_u16(); break; - case 2: /* no-op */; break; - default: abort(); - } - gen_neon_movl_scratch_T0(n * 2); - gen_neon_movl_scratch_T1(n * 2 + 1); - } - for (n = 0; n < count * 2; n++) { - int reg = (n < count) ? rd : rm; - gen_neon_movl_T0_scratch(n); - NEON_SET_REG(T0, reg, n % count); } break; - case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */ - if (size == 3) + case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: + /* also VQMOVUN; op field and mnemonics don't line up */ + if (rm & 1) { return 1; + } TCGV_UNUSED(tmp2); for (pass = 0; pass < 2; pass++) { neon_load_reg64(cpu_V0, rm + pass); - tmp = new_tmp(); - if (op == 36 && q == 0) { - gen_neon_narrow(size, tmp, cpu_V0); - } else if (q) { - gen_neon_narrow_satu(size, tmp, cpu_V0); - } else { - gen_neon_narrow_sats(size, tmp, cpu_V0); - } + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size, + tmp, cpu_V0); if (pass == 0) { tmp2 = tmp; } else { @@ -5326,243 +5756,317 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) } } break; - case 38: /* VSHLL */ - if (q || size == 3) + case NEON_2RM_VSHLL: + if (q || (rd & 1)) { return 1; + } tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); for (pass = 0; pass < 2; pass++) { if (pass == 1) tmp = tmp2; gen_neon_widen(cpu_V0, tmp, size, 1); + tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size); neon_store_reg64(cpu_V0, rd + pass); } break; + case NEON_2RM_VCVT_F16_F32: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rm & 1)) { + return 1; + } + tmp = tcg_temp_new_i32(); + tmp2 = tcg_temp_new_i32(); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp2, tmp2, tmp); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); + neon_store_reg(rd, 0, tmp2); + tmp2 = tcg_temp_new_i32(); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp2, tmp2, tmp); + neon_store_reg(rd, 1, tmp2); + tcg_temp_free_i32(tmp); + break; + case NEON_2RM_VCVT_F32_F16: + if (!arm_feature(env, ARM_FEATURE_VFP_FP16) || + q || (rd & 1)) { + return 1; + } + tmp3 = tcg_temp_new_i32(); + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + tcg_gen_ext16u_i32(tmp3, tmp); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_shri_i32(tmp3, tmp, 16); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_ext16u_i32(tmp3, tmp2); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); + tcg_gen_shri_i32(tmp3, tmp2, 16); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + break; default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); + TCGV_UNUSED(tmp); } else { - NEON_GET_REG(T0, rm, pass); + tmp = neon_load_reg(rm, pass); } switch (op) { - case 1: /* VREV32 */ + case NEON_2RM_VREV32: switch (size) { - case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break; - case 1: gen_swap_half(cpu_T[0]); break; - default: return 1; + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; + case 1: gen_swap_half(tmp); break; + default: abort(); } break; - case 2: /* VREV16 */ - if (size != 0) - return 1; - gen_rev16(cpu_T[0]); + case NEON_2RM_VREV16: + gen_rev16(tmp); break; - case 8: /* CLS */ + case NEON_2RM_VCLS: switch (size) { - case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break; - case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break; - case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break; - default: return 1; + case 0: gen_helper_neon_cls_s8(tmp, tmp); break; + case 1: gen_helper_neon_cls_s16(tmp, tmp); break; + case 2: gen_helper_neon_cls_s32(tmp, tmp); break; + default: abort(); } break; - case 9: /* CLZ */ + case NEON_2RM_VCLZ: switch (size) { - case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break; - case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break; - case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break; - default: return 1; + case 0: gen_helper_neon_clz_u8(tmp, tmp); break; + case 1: gen_helper_neon_clz_u16(tmp, tmp); break; + case 2: gen_helper_clz(tmp, tmp); break; + default: abort(); } break; - case 10: /* CNT */ - if (size != 0) - return 1; - gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]); + case NEON_2RM_VCNT: + gen_helper_neon_cnt_u8(tmp, tmp); break; - case 11: /* VNOT */ - if (size != 0) - return 1; - gen_op_notl_T0(); + case NEON_2RM_VMVN: + tcg_gen_not_i32(tmp, tmp); break; - case 14: /* VQABS */ + case NEON_2RM_VQABS: switch (size) { - case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break; - case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break; - case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break; - default: return 1; + case 0: gen_helper_neon_qabs_s8(tmp, tmp); break; + case 1: gen_helper_neon_qabs_s16(tmp, tmp); break; + case 2: gen_helper_neon_qabs_s32(tmp, tmp); break; + default: abort(); } break; - case 15: /* VQNEG */ + case NEON_2RM_VQNEG: switch (size) { - case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break; - case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break; - case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break; - default: return 1; + case 0: gen_helper_neon_qneg_s8(tmp, tmp); break; + case 1: gen_helper_neon_qneg_s16(tmp, tmp); break; + case 2: gen_helper_neon_qneg_s32(tmp, tmp); break; + default: abort(); } break; - case 16: case 19: /* VCGT #0, VCLE #0 */ - gen_op_movl_T1_im(0); + case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: + tmp2 = tcg_const_i32(0); switch(size) { - case 0: gen_helper_neon_cgt_s8(CPU_T001); break; - case 1: gen_helper_neon_cgt_s16(CPU_T001); break; - case 2: gen_helper_neon_cgt_s32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free(tmp2); + if (op == NEON_2RM_VCLE0) { + tcg_gen_not_i32(tmp, tmp); } - if (op == 19) - gen_op_notl_T0(); break; - case 17: case 20: /* VCGE #0, VCLT #0 */ - gen_op_movl_T1_im(0); + case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: + tmp2 = tcg_const_i32(0); switch(size) { - case 0: gen_helper_neon_cge_s8(CPU_T001); break; - case 1: gen_helper_neon_cge_s16(CPU_T001); break; - case 2: gen_helper_neon_cge_s32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free(tmp2); + if (op == NEON_2RM_VCLT0) { + tcg_gen_not_i32(tmp, tmp); } - if (op == 20) - gen_op_notl_T0(); break; - case 18: /* VCEQ #0 */ - gen_op_movl_T1_im(0); + case NEON_2RM_VCEQ0: + tmp2 = tcg_const_i32(0); switch(size) { - case 0: gen_helper_neon_ceq_u8(CPU_T001); break; - case 1: gen_helper_neon_ceq_u16(CPU_T001); break; - case 2: gen_helper_neon_ceq_u32(CPU_T001); break; - default: return 1; + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; + default: abort(); } + tcg_temp_free(tmp2); break; - case 22: /* VABS */ + case NEON_2RM_VABS: switch(size) { - case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break; - case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break; - case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break; - default: return 1; + case 0: gen_helper_neon_abs_s8(tmp, tmp); break; + case 1: gen_helper_neon_abs_s16(tmp, tmp); break; + case 2: tcg_gen_abs_i32(tmp, tmp); break; + default: abort(); } break; - case 23: /* VNEG */ - gen_op_movl_T1_im(0); - if (size == 3) - return 1; - gen_neon_rsb(size); + case NEON_2RM_VNEG: + tmp2 = tcg_const_i32(0); + gen_neon_rsb(size, tmp, tmp2); + tcg_temp_free(tmp2); + break; + case NEON_2RM_VCGT0_F: + tmp2 = tcg_const_i32(0); + gen_helper_neon_cgt_f32(tmp, tmp, tmp2); + tcg_temp_free(tmp2); + break; + case NEON_2RM_VCGE0_F: + tmp2 = tcg_const_i32(0); + gen_helper_neon_cge_f32(tmp, tmp, tmp2); + tcg_temp_free(tmp2); break; - case 24: case 27: /* Float VCGT #0, Float VCLE #0 */ - gen_op_movl_T1_im(0); - gen_helper_neon_cgt_f32(CPU_T001); - if (op == 27) - gen_op_notl_T0(); + case NEON_2RM_VCEQ0_F: + tmp2 = tcg_const_i32(0); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2); + tcg_temp_free(tmp2); break; - case 25: case 28: /* Float VCGE #0, Float VCLT #0 */ - gen_op_movl_T1_im(0); - gen_helper_neon_cge_f32(CPU_T001); - if (op == 28) - gen_op_notl_T0(); + case NEON_2RM_VCLE0_F: + tmp2 = tcg_const_i32(0); + gen_helper_neon_cge_f32(tmp, tmp2, tmp); + tcg_temp_free(tmp2); break; - case 26: /* Float VCEQ #0 */ - gen_op_movl_T1_im(0); - gen_helper_neon_ceq_f32(CPU_T001); + case NEON_2RM_VCLT0_F: + tmp2 = tcg_const_i32(0); + gen_helper_neon_cgt_f32(tmp, tmp2, tmp); + tcg_temp_free(tmp2); break; - case 30: /* Float VABS */ + case NEON_2RM_VABS_F: gen_vfp_abs(0); break; - case 31: /* Float VNEG */ + case NEON_2RM_VNEG_F: gen_vfp_neg(0); break; - case 32: /* VSWP */ - NEON_GET_REG(T1, rd, pass); - NEON_SET_REG(T1, rm, pass); + case NEON_2RM_VSWP: + tmp2 = neon_load_reg(rd, pass); + neon_store_reg(rm, pass, tmp2); break; - case 33: /* VTRN */ - NEON_GET_REG(T1, rd, pass); + case NEON_2RM_VTRN: + tmp2 = neon_load_reg(rd, pass); switch (size) { - case 0: gen_helper_neon_trn_u8(); break; - case 1: gen_helper_neon_trn_u16(); break; - case 2: abort(); - default: return 1; + case 0: gen_neon_trn_u8(tmp, tmp2); break; + case 1: gen_neon_trn_u16(tmp, tmp2); break; + default: abort(); } - NEON_SET_REG(T1, rm, pass); + neon_store_reg(rm, pass, tmp2); break; - case 56: /* Integer VRECPE */ - gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env); + case NEON_2RM_VRECPE: + gen_helper_recpe_u32(tmp, tmp, cpu_env); break; - case 57: /* Integer VRSQRTE */ - gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env); + case NEON_2RM_VRSQRTE: + gen_helper_rsqrte_u32(tmp, tmp, cpu_env); break; - case 58: /* Float VRECPE */ + case NEON_2RM_VRECPE_F: gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 59: /* Float VRSQRTE */ + case NEON_2RM_VRSQRTE_F: gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env); break; - case 60: /* VCVT.F32.S32 */ - gen_vfp_tosiz(0); + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vfp_sito(0, 1); break; - case 61: /* VCVT.F32.U32 */ - gen_vfp_touiz(0); + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vfp_uito(0, 1); break; - case 62: /* VCVT.S32.F32 */ - gen_vfp_sito(0); + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vfp_tosiz(0, 1); break; - case 63: /* VCVT.U32.F32 */ - gen_vfp_uito(0); + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vfp_touiz(0, 1); break; default: - /* Reserved: 21, 29, 39-56 */ - return 1; + /* Reserved op values were caught by the + * neon_2rm_sizes[] check earlier. + */ + abort(); } - if (op == 30 || op == 31 || op >= 58) { + if (neon_2rm_is_float_op(op)) { tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); } else { - NEON_SET_REG(T0, rd, pass); + neon_store_reg(rd, pass, tmp); } } break; } } else if ((insn & (1 << 10)) == 0) { /* VTBL, VTBX. */ - n = ((insn >> 5) & 0x18) + 8; + int n = ((insn >> 8) & 3) + 1; + if ((rn + n) > 32) { + /* This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return 1; + } + n <<= 3; if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 0); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } tmp2 = neon_load_reg(rm, 0); - gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn), - tcg_const_i32(n)); - dead_tmp(tmp); + tmp4 = tcg_const_i32(rn); + tmp5 = tcg_const_i32(n); + gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5); + tcg_temp_free_i32(tmp); if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 1); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } tmp3 = neon_load_reg(rm, 1); - gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn), - tcg_const_i32(n)); + gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5); + tcg_temp_free_i32(tmp5); + tcg_temp_free_i32(tmp4); neon_store_reg(rd, 0, tmp2); neon_store_reg(rd, 1, tmp3); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else if ((insn & 0x380) == 0) { /* VDUP */ + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { + return 1; + } if (insn & (1 << 19)) { - NEON_SET_REG(T0, rm, 1); + tmp = neon_load_reg(rm, 1); } else { - NEON_SET_REG(T0, rm, 0); + tmp = neon_load_reg(rm, 0); } if (insn & (1 << 16)) { - gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8); + gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); } else if (insn & (1 << 17)) { if ((insn >> 18) & 1) - gen_neon_dup_high16(cpu_T[0]); + gen_neon_dup_high16(tmp); else - gen_neon_dup_low16(cpu_T[0]); + gen_neon_dup_low16(tmp); } for (pass = 0; pass < (q ? 4 : 2); pass++) { - NEON_SET_REG(T0, rd, pass); + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); + neon_store_reg(rd, pass, tmp2); } + tcg_temp_free_i32(tmp); } else { return 1; } @@ -5580,6 +6084,34 @@ static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn) int rt = (insn >> 12) & 0xf; TCGv tmp; + /* Minimal set of debug registers, since we don't support debug */ + if (op1 == 0 && crn == 0 && op2 == 0) { + switch (crm) { + case 0: + /* DBGDIDR: just RAZ. In particular this means the + * "debug architecture version" bits will read as + * a reserved value, which should cause Linux to + * not try to use the debug hardware. + */ + tmp = tcg_const_i32(0); + store_reg(s, rt, tmp); + return 0; + case 1: + case 2: + /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we + * don't implement memory mapped debug components + */ + if (ENABLE_ARCH_7) { + tmp = tcg_const_i32(0); + store_reg(s, rt, tmp); + return 0; + } + break; + default: + break; + } + } + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) { /* TEECR */ @@ -5612,6 +6144,30 @@ static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn) int rt = (insn >> 12) & 0xf; TCGv tmp; + /* Minimal set of debug registers, since we don't support debug */ + if (op1 == 0 && crn == 0 && op2 == 0) { + switch (crm) { + case 0: + /* DBGDIDR */ + tmp = load_cpu_field(cp14_dbgdidr); + store_reg(s, rt, tmp); + return 0; + case 1: + case 2: + /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we + * don't implement memory mapped debug components + */ + if (ENABLE_ARCH_7) { + tmp = tcg_const_i32(0); + store_reg(s, rt, tmp); + return 0; + } + break; + default: + break; + } + } + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) { /* TEECR */ @@ -5619,7 +6175,7 @@ static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn) return 1; tmp = load_reg(s, rt); gen_helper_set_teecr(cpu_env, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); return 0; } if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) { @@ -5653,7 +6209,7 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn) } else if (arm_feature(env, ARM_FEATURE_XSCALE)) { return disas_dsp_insn(env, s, insn); } - return 1; + goto board; case 10: case 11: return disas_vfp_insn (env, s, insn); @@ -5680,10 +6236,10 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn) static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) { TCGv tmp; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, val); store_reg(s, rlow, tmp); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_shri_i64(val, val, 32); tcg_gen_trunc_i64_i32(tmp, val); store_reg(s, rhigh, tmp); @@ -5699,8 +6255,9 @@ static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow) tmp = tcg_temp_new_i64(); tmp2 = load_reg(s, rlow); tcg_gen_extu_i32_i64(tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tcg_gen_add_i64(val, val, tmp); + tcg_temp_free_i64(tmp); } /* load and add a 64-bit value from a register pair. */ @@ -5715,20 +6272,145 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh) tmph = load_reg(s, rhigh); tmp = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(tmp, tmpl, tmph); - dead_tmp(tmpl); - dead_tmp(tmph); + tcg_temp_free_i32(tmpl); + tcg_temp_free_i32(tmph); tcg_gen_add_i64(val, val, tmp); + tcg_temp_free_i64(tmp); } /* Set N and Z flags from a 64-bit value. */ static void gen_logicq_cc(TCGv_i64 val) { - TCGv tmp = new_tmp(); + TCGv tmp = tcg_temp_new_i32(); gen_helper_logicq_cc(tmp, val); gen_logic_CC(tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); +} + +/* Load/Store exclusive instructions are implemented by remembering + the value/address loaded, and seeing if these are the same + when the store is performed. This should be is sufficient to implement + the architecturally mandated semantics, and avoids having to monitor + regular stores. + + In system emulation mode only one CPU will be running at once, so + this sequence is effectively atomic. In user emulation mode we + throw an exception and handle the atomic operation elsewhere. */ +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, + TCGv addr, int size) +{ + TCGv tmp; + + switch (size) { + case 0: + tmp = gen_ld8u(addr, IS_USER(s)); + break; + case 1: + tmp = gen_ld16u(addr, IS_USER(s)); + break; + case 2: + case 3: + tmp = gen_ld32(addr, IS_USER(s)); + break; + default: + abort(); + } + tcg_gen_mov_i32(cpu_exclusive_val, tmp); + store_reg(s, rt, tmp); + if (size == 3) { + TCGv tmp2 = tcg_temp_new_i32(); + tcg_gen_addi_i32(tmp2, addr, 4); + tmp = gen_ld32(tmp2, IS_USER(s)); + tcg_temp_free_i32(tmp2); + tcg_gen_mov_i32(cpu_exclusive_high, tmp); + store_reg(s, rt2, tmp); + } + tcg_gen_mov_i32(cpu_exclusive_addr, addr); +} + +static void gen_clrex(DisasContext *s) +{ + tcg_gen_movi_i32(cpu_exclusive_addr, -1); } +#ifdef CONFIG_USER_ONLY +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv addr, int size) +{ + tcg_gen_mov_i32(cpu_exclusive_test, addr); + tcg_gen_movi_i32(cpu_exclusive_info, + size | (rd << 4) | (rt << 8) | (rt2 << 12)); + gen_exception_insn(s, 4, EXCP_STREX); +} +#else +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv addr, int size) +{ + TCGv tmp; + int done_label; + int fail_label; + + /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) { + [addr] = {Rt}; + {Rd} = 0; + } else { + {Rd} = 1; + } */ + fail_label = gen_new_label(); + done_label = gen_new_label(); + tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); + switch (size) { + case 0: + tmp = gen_ld8u(addr, IS_USER(s)); + break; + case 1: + tmp = gen_ld16u(addr, IS_USER(s)); + break; + case 2: + case 3: + tmp = gen_ld32(addr, IS_USER(s)); + break; + default: + abort(); + } + tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label); + tcg_temp_free_i32(tmp); + if (size == 3) { + TCGv tmp2 = tcg_temp_new_i32(); + tcg_gen_addi_i32(tmp2, addr, 4); + tmp = gen_ld32(tmp2, IS_USER(s)); + tcg_temp_free_i32(tmp2); + tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label); + tcg_temp_free_i32(tmp); + } + tmp = load_reg(s, rt); + switch (size) { + case 0: + gen_st8(tmp, addr, IS_USER(s)); + break; + case 1: + gen_st16(tmp, addr, IS_USER(s)); + break; + case 2: + case 3: + gen_st32(tmp, addr, IS_USER(s)); + break; + default: + abort(); + } + if (size == 3) { + tcg_gen_addi_i32(addr, addr, 4); + tmp = load_reg(s, rt2); + gen_st32(tmp, addr, IS_USER(s)); + } + tcg_gen_movi_i32(cpu_R[rd], 0); + tcg_gen_br(done_label); + gen_set_label(fail_label); + tcg_gen_movi_i32(cpu_R[rd], 1); + gen_set_label(done_label); + tcg_gen_movi_i32(cpu_exclusive_addr, -1); +} +#endif static void disas_arm_insn(CPUState * env, DisasContext *s) { @@ -5739,6 +6421,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) TCGv tmp3; TCGv addr; TCGv_i64 tmp64; + insn = ldl_code(s->pc); ANDROID_WATCH_CALLSTACK_ARM(s); @@ -5752,6 +6435,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; cond = insn >> 28; if (cond == 0xf){ + /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we + * choose to UNDEF. In ARMv5 and above the space is used + * for miscellaneous unconditional instructions. + */ + ARCH(5); + ANDROID_TRACE_GEN_TICKS(); /* Unconditional instructions. */ if (((insn >> 25) & 7) == 1) { @@ -5772,9 +6461,32 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; return; } - if ((insn & 0x0d70f000) == 0x0550f000) - return; /* PLD */ - else if ((insn & 0x0ffffdff) == 0x01010000) { + if (((insn & 0x0f30f000) == 0x0510f000) || + ((insn & 0x0f30f010) == 0x0710f000)) { + if ((insn & (1 << 22)) == 0) { + /* PLDW; v7MP */ + if (!arm_feature(env, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + } + /* Otherwise PLD; v5TE+ */ + ARCH(5TE); + return; + } + if (((insn & 0x0f70f000) == 0x0450f000) || + ((insn & 0x0f70f010) == 0x0650f000)) { + ARCH(7); + return; /* PLI; V7 */ + } + if (((insn & 0x0f700000) == 0x04100000) || + ((insn & 0x0f700010) == 0x06100000)) { + if (!arm_feature(env, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + return; /* v7MP: Unallocated memory hint: must NOP */ + } + + if ((insn & 0x0ffffdff) == 0x01010000) { ARCH(6); /* setend */ if (insn & (1 << 9)) { @@ -5786,7 +6498,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) switch ((insn >> 4) & 0xf) { case 1: /* clrex */ ARCH(6K); - gen_helper_clrex(cpu_env); + gen_clrex(s); return; case 4: /* dsb */ case 5: /* dmb */ @@ -5799,22 +6511,20 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } } else if ((insn & 0x0e5fffe0) == 0x084d0500) { /* srs */ - uint32_t offset; + int32_t offset; if (IS_USER(s)) goto illegal_op; ARCH(6); op1 = (insn & 0x1f); - if (op1 == (env->uncached_cpsr & CPSR_M)) { - addr = load_reg(s, 13); - } else { - addr = new_tmp(); - gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op1)); - } + addr = tcg_temp_new_i32(); + tmp = tcg_const_i32(op1); + gen_helper_get_r13_banked(addr, cpu_env, tmp); + tcg_temp_free_i32(tmp); i = (insn >> 23) & 3; switch (i) { case 0: offset = -4; break; /* DA */ - case 1: offset = -8; break; /* DB */ - case 2: offset = 0; break; /* IA */ + case 1: offset = 0; break; /* IA */ + case 2: offset = -8; break; /* DB */ case 3: offset = 4; break; /* IB */ default: abort(); } @@ -5822,32 +6532,31 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i32(addr, addr, offset); tmp = load_reg(s, 14); gen_st32(tmp, addr, 0); - tmp = new_tmp(); - gen_helper_cpsr_read(tmp); + tmp = load_cpu_field(spsr); tcg_gen_addi_i32(addr, addr, 4); gen_st32(tmp, addr, 0); if (insn & (1 << 21)) { /* Base writeback. */ switch (i) { case 0: offset = -8; break; - case 1: offset = -4; break; - case 2: offset = 4; break; + case 1: offset = 4; break; + case 2: offset = -4; break; case 3: offset = 0; break; default: abort(); } if (offset) - tcg_gen_addi_i32(addr, tmp, offset); - if (op1 == (env->uncached_cpsr & CPSR_M)) { - gen_movl_reg_T1(s, 13); - } else { - gen_helper_set_r13_banked(cpu_env, tcg_const_i32(op1), cpu_T[1]); - } + tcg_gen_addi_i32(addr, addr, offset); + tmp = tcg_const_i32(op1); + gen_helper_set_r13_banked(cpu_env, tmp, addr); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } - } else if ((insn & 0x0e5fffe0) == 0x081d0a00) { + return; + } else if ((insn & 0x0e50ffe0) == 0x08100a00) { /* rfe */ - uint32_t offset; + int32_t offset; if (IS_USER(s)) goto illegal_op; ARCH(6); @@ -5856,8 +6565,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) i = (insn >> 23) & 3; switch (i) { case 0: offset = -4; break; /* DA */ - case 1: offset = -8; break; /* DB */ - case 2: offset = 0; break; /* IA */ + case 1: offset = 0; break; /* IA */ + case 2: offset = -8; break; /* DB */ case 3: offset = 4; break; /* IB */ default: abort(); } @@ -5871,8 +6580,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* Base writeback. */ switch (i) { case 0: offset = -8; break; - case 1: offset = -4; break; - case 2: offset = 4; break; + case 1: offset = 4; break; + case 2: offset = -4; break; case 3: offset = 0; break; default: abort(); } @@ -5880,15 +6589,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i32(addr, addr, offset); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } gen_rfe(s, tmp, tmp2); + return; } else if ((insn & 0x0e000000) == 0x0a000000) { /* branch link and change to thumb (blx <offset>) */ int32_t offset; val = (uint32_t)s->pc; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_reg(s, 14, tmp); /* Sign-extend the 24-bit offset */ @@ -5897,6 +6607,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) val += (offset << 2) | ((insn >> 23) & 2) | 1; /* pipeline offset */ val += 4; + /* protected by ARCH(5); above, near the start of uncond block */ gen_bx_im(s, val); return; } else if ((insn & 0x0e000f00) == 0x0c000100) { @@ -5908,8 +6619,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } } else if ((insn & 0x0fe00000) == 0x0c400000) { /* Coprocessor double register transfer. */ + ARCH(5TE); } else if ((insn & 0x0f000010) == 0x0e000010) { /* Additional coprocessor register transfer. */ + if (!disas_coproc_insn(env, s, insn)) { + return; + } } else if ((insn & 0x0ff10020) == 0x01000000) { uint32_t mask; uint32_t val; @@ -5932,8 +6647,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) val |= (insn & 0x1f); } if (mask) { - gen_op_movl_T0_im(val); - gen_set_psr_T0(s, mask, 0); + gen_set_psr_im(s, mask, 0, val); } return; } @@ -5955,7 +6669,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) val = ((insn >> 4) & 0xf000) | (insn & 0xfff); if ((insn & (1 << 22)) == 0) { /* MOVW */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); } else { /* MOVT */ @@ -5975,9 +6689,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) shift = ((insn >> 8) & 0xf) * 2; if (shift) val = (val >> shift) | (val << (32 - shift)); - gen_op_movl_T0_im(val); i = ((insn & (1 << 22)) != 0); - if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i)) + if (gen_set_psr_im(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, val)) goto illegal_op; } } @@ -5991,9 +6704,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 0x0: /* move program status register */ if (op1 & 1) { /* PSR = reg */ - gen_movl_T0_reg(s, rm); + tmp = load_reg(s, rm); i = ((op1 & 2) != 0); - if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i)) + if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp)) goto illegal_op; } else { /* reg = PSR */ @@ -6003,7 +6716,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; tmp = load_cpu_field(spsr); } else { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_cpsr_read(tmp); } store_reg(s, rd, tmp); @@ -6012,10 +6725,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 0x1: if (op1 == 1) { /* branch/exchange thumb (bx). */ + ARCH(4T); tmp = load_reg(s, rm); gen_bx(s, tmp); } else if (op1 == 3) { /* clz */ + ARCH(5); rd = (insn >> 12) & 0xf; tmp = load_reg(s, rm); gen_helper_clz(tmp, tmp); @@ -6038,14 +6753,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (op1 != 1) goto illegal_op; + ARCH(5); /* branch link/exchange thumb (blx) */ tmp = load_reg(s, rm); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc); store_reg(s, 14, tmp2); gen_bx(s, tmp); break; case 0x5: /* saturating add/subtract */ + ARCH(5TE); rd = (insn >> 12) & 0xf; rn = (insn >> 16) & 0xf; tmp = load_reg(s, rm); @@ -6056,19 +6773,29 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_helper_sub_saturate(tmp, tmp, tmp2); else gen_helper_add_saturate(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; - case 7: /* bkpt */ - gen_set_condexec(s); - gen_set_pc_im(s->pc - 4); - gen_exception(EXCP_BKPT); - s->is_jmp = DISAS_JUMP; + case 7: + if (op1 == 1) { + /* bkpt */ + ARCH(5); + gen_exception_insn(s, 4, EXCP_BKPT); + } else if (op1 == 3) { + /* smi/smc */ + if (!(env->cp15.c0_c2[4] & 0xf000) || IS_USER(s)) { + goto illegal_op; + } + gen_smc(env, s); + } else { + goto illegal_op; + } break; case 0x8: /* signed multiply */ case 0xa: case 0xc: case 0xe: + ARCH(5TE); rs = (insn >> 8) & 0xf; rn = (insn >> 12) & 0xf; rd = (insn >> 16) & 0xf; @@ -6082,12 +6809,13 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_sxth(tmp2); tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); if ((sh & 2) == 0) { tmp2 = load_reg(s, rn); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); } else { @@ -6095,18 +6823,19 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rm); tmp2 = load_reg(s, rs); gen_mulxy(tmp, tmp2, sh & 2, sh & 4); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (op1 == 2) { tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rn, rd); gen_storeq_reg(s, rn, rd, tmp64); + tcg_temp_free_i64(tmp64); } else { if (op1 == 0) { tmp2 = load_reg(s, rn); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); } @@ -6132,7 +6861,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (shift) { val = (val >> shift) | (val << (32 - shift)); } - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, val); if (logic_cc && shift) { gen_set_CF_bit31(tmp2); @@ -6235,26 +6964,26 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_and_i32(tmp, tmp, tmp2); gen_logic_CC(tmp); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x09: if (set_cc) { tcg_gen_xor_i32(tmp, tmp, tmp2); gen_logic_CC(tmp); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0a: if (set_cc) { gen_helper_sub_cc(tmp, tmp, tmp2); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0b: if (set_cc) { gen_helper_add_cc(tmp, tmp, tmp2); } - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; case 0x0c: tcg_gen_or_i32(tmp, tmp, tmp2); @@ -6278,7 +7007,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } break; case 0x0e: - tcg_gen_bic_i32(tmp, tmp, tmp2); + tcg_gen_andc_i32(tmp, tmp, tmp2); if (logic_cc) { gen_logic_CC(tmp); } @@ -6294,7 +7023,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) break; } if (op1 != 0x0f && op1 != 0x0d) { - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } else { /* other instructions */ @@ -6317,42 +7046,55 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rs); tmp2 = load_reg(s, rm); tcg_gen_mul_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (insn & (1 << 22)) { /* Subtract (mls) */ ARCH(6T2); tmp2 = load_reg(s, rn); tcg_gen_sub_i32(tmp, tmp2, tmp); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } else if (insn & (1 << 21)) { /* Add */ tmp2 = load_reg(s, rn); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } if (insn & (1 << 20)) gen_logic_CC(tmp); store_reg(s, rd, tmp); break; - default: - /* 64 bit mul */ + case 4: + /* 64 bit mul double accumulate (UMAAL) */ + ARCH(6); tmp = load_reg(s, rs); tmp2 = load_reg(s, rm); - if (insn & (1 << 22)) + tmp64 = gen_mulu_i64_i32(tmp, tmp2); + gen_addq_lo(s, tmp64, rn); + gen_addq_lo(s, tmp64, rd); + gen_storeq_reg(s, rn, rd, tmp64); + tcg_temp_free_i64(tmp64); + break; + case 8: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: + /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */ + tmp = load_reg(s, rs); + tmp2 = load_reg(s, rm); + if (insn & (1 << 22)) { tmp64 = gen_muls_i64_i32(tmp, tmp2); - else + } else { tmp64 = gen_mulu_i64_i32(tmp, tmp2); - if (insn & (1 << 21)) /* mult accumulate */ + } + if (insn & (1 << 21)) { /* mult accumulate */ gen_addq(s, tmp64, rn, rd); - if (!(insn & (1 << 23))) { /* double accumulate */ - ARCH(6); - gen_addq_lo(s, tmp64, rn); - gen_addq_lo(s, tmp64, rd); } - if (insn & (1 << 20)) + if (insn & (1 << 20)) { gen_logicq_cc(tmp64); + } gen_storeq_reg(s, rn, rd, tmp64); + tcg_temp_free_i64(tmp64); break; + default: + goto illegal_op; } } else { rn = (insn >> 16) & 0xf; @@ -6364,60 +7106,45 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) ARCH(6K); else ARCH(6); - gen_movl_T1_reg(s, rn); - addr = cpu_T[1]; + addr = tcg_temp_local_new_i32(); + load_reg_var(s, addr, rn); if (insn & (1 << 20)) { - gen_helper_mark_exclusive(cpu_env, cpu_T[1]); switch (op1) { case 0: /* ldrex */ - tmp = gen_ld32(addr, IS_USER(s)); + gen_load_exclusive(s, rd, 15, addr, 2); break; case 1: /* ldrexd */ - tmp = gen_ld32(addr, IS_USER(s)); - store_reg(s, rd, tmp); - tcg_gen_addi_i32(addr, addr, 4); - tmp = gen_ld32(addr, IS_USER(s)); - rd++; + gen_load_exclusive(s, rd, rd + 1, addr, 3); break; case 2: /* ldrexb */ - tmp = gen_ld8u(addr, IS_USER(s)); + gen_load_exclusive(s, rd, 15, addr, 0); break; case 3: /* ldrexh */ - tmp = gen_ld16u(addr, IS_USER(s)); + gen_load_exclusive(s, rd, 15, addr, 1); break; default: abort(); } - store_reg(s, rd, tmp); } else { - int label = gen_new_label(); rm = insn & 0xf; - gen_helper_test_exclusive(cpu_T[0], cpu_env, addr); - tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], - 0, label); - tmp = load_reg(s,rm); switch (op1) { case 0: /* strex */ - gen_st32(tmp, addr, IS_USER(s)); + gen_store_exclusive(s, rd, rm, 15, addr, 2); break; case 1: /* strexd */ - gen_st32(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, 4); - tmp = load_reg(s, rm + 1); - gen_st32(tmp, addr, IS_USER(s)); + gen_store_exclusive(s, rd, rm, rm + 1, addr, 3); break; case 2: /* strexb */ - gen_st8(tmp, addr, IS_USER(s)); + gen_store_exclusive(s, rd, rm, 15, addr, 0); break; case 3: /* strexh */ - gen_st16(tmp, addr, IS_USER(s)); + gen_store_exclusive(s, rd, rm, 15, addr, 1); break; default: abort(); } - gen_set_label(label); - gen_movl_reg_T0(s, rd); } + tcg_temp_free(addr); } else { /* SWP instruction */ rm = (insn) & 0xf; @@ -6434,7 +7161,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp2 = gen_ld32(addr, IS_USER(s)); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); store_reg(s, rd, tmp2); } } @@ -6464,6 +7191,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } load = 1; } else if (sh & 2) { + ARCH(5TE); /* doubleword */ if (sh & 1) { /* store */ @@ -6501,7 +7229,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_addi_i32(addr, addr, address_offset); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } if (load) { /* Complete the load. */ @@ -6530,7 +7258,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if ((op1 & 3) == 0 || sh == 5 || sh == 6) goto illegal_op; gen_arm_parallel_addsub(op1, sh, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 1: @@ -6554,7 +7282,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); } tcg_gen_or_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x00200020) == 0x00200000) { /* [us]sat */ @@ -6568,41 +7296,41 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tcg_gen_shli_i32(tmp, tmp, shift); } sh = (insn >> 16) & 0x1f; - if (sh != 0) { - if (insn & (1 << 22)) - gen_helper_usat(tmp, tmp, tcg_const_i32(sh)); - else - gen_helper_ssat(tmp, tmp, tcg_const_i32(sh)); - } + tmp2 = tcg_const_i32(sh); + if (insn & (1 << 22)) + gen_helper_usat(tmp, tmp, tmp2); + else + gen_helper_ssat(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x00300fe0) == 0x00200f20) { /* [us]sat16 */ tmp = load_reg(s, rm); sh = (insn >> 16) & 0x1f; - if (sh != 0) { - if (insn & (1 << 22)) - gen_helper_usat16(tmp, tmp, tcg_const_i32(sh)); - else - gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh)); - } + tmp2 = tcg_const_i32(sh); + if (insn & (1 << 22)) + gen_helper_usat16(tmp, tmp, tmp2); + else + gen_helper_ssat16(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x00700fe0) == 0x00000fa0) { /* Select bytes. */ tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); - tmp3 = new_tmp(); + tmp3 = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((insn & 0x000003e0) == 0x00000060) { tmp = load_reg(s, rm); shift = (insn >> 10) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) - tcg_gen_rori_i32(tmp, tmp, shift * 8); + tcg_gen_rotri_i32(tmp, tmp, shift * 8); op1 = (insn >> 20) & 7; switch (op1) { case 0: gen_sxtb16(tmp); break; @@ -6611,7 +7339,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 4: gen_uxtb16(tmp); break; case 6: gen_uxtb(tmp); break; case 7: gen_uxth(tmp); break; - default: goto illegal_op; + default: tcg_temp_free_i32(tmp); goto illegal_op; } if (rn != 15) { tmp2 = load_reg(s, rn); @@ -6619,7 +7347,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_add16(tmp, tmp2); } else { tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } store_reg(s, rd, tmp); @@ -6648,48 +7376,56 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rm); tmp2 = load_reg(s, rs); if (insn & (1 << 20)) { - /* Signed multiply most significant [accumulate]. */ + /* Signed multiply most significant [accumulate]. + (SMMUL, SMMLA, SMMLS) */ tmp64 = gen_muls_i64_i32(tmp, tmp2); - if (insn & (1 << 5)) - tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); - tcg_gen_shri_i64(tmp64, tmp64, 32); - tmp = new_tmp(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + if (rd != 15) { - tmp2 = load_reg(s, rd); + tmp = load_reg(s, rd); if (insn & (1 << 6)) { - tcg_gen_sub_i32(tmp, tmp, tmp2); + tmp64 = gen_subq_msw(tmp64, tmp); } else { - tcg_gen_add_i32(tmp, tmp, tmp2); + tmp64 = gen_addq_msw(tmp64, tmp); } - dead_tmp(tmp2); } + if (insn & (1 << 5)) { + tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); + } + tcg_gen_shri_i64(tmp64, tmp64, 32); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); store_reg(s, rn, tmp); } else { if (insn & (1 << 5)) gen_swap_half(tmp2); gen_smul_dual(tmp, tmp2); - /* This addition cannot overflow. */ if (insn & (1 << 6)) { + /* This subtraction cannot overflow. */ tcg_gen_sub_i32(tmp, tmp, tmp2); } else { - tcg_gen_add_i32(tmp, tmp, tmp2); + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a signed + * operation, in which case we must set the Q flag. + */ + gen_helper_add_setq(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (insn & (1 << 22)) { /* smlald, smlsld */ tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rd, rn); gen_storeq_reg(s, rd, rn, tmp64); + tcg_temp_free_i64(tmp64); } else { /* smuad, smusd, smlad, smlsd */ if (rd != 15) { tmp2 = load_reg(s, rd); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rn, tmp); } @@ -6703,11 +7439,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) tmp = load_reg(s, rm); tmp2 = load_reg(s, rs); gen_helper_usad8(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rd != 15) { tmp2 = load_reg(s, rd); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rn, tmp); break; @@ -6718,7 +7454,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) i = (insn >> 16) & 0x1f; i = i + 1 - shift; if (rm == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rm); @@ -6726,7 +7462,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (i != 32) { tmp2 = load_reg(s, rd); gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } store_reg(s, rd, tmp); break; @@ -6792,14 +7528,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } else if (insn & (1 << 21)) { store_reg(s, rn, tmp2); } else { - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } if (insn & (1 << 20)) { /* Complete the load. */ - if (rd == 15) - gen_bx(s, tmp); - else - store_reg(s, rd, tmp); + store_reg_from_load(env, s, rd, tmp); } break; case 0x08: @@ -6819,6 +7552,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } rn = (insn >> 16) & 0xf; addr = load_reg(s, rn); + tmp3 = tcg_const_i32(4); /* compute total size */ loaded_base = 0; @@ -6832,7 +7566,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (insn & (1 << 23)) { if (insn & (1 << 24)) { /* pre increment */ - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp3); } else { /* post increment */ } @@ -6852,27 +7586,29 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (insn & (1 << 20)) { /* load */ tmp = gen_ld32(addr, IS_USER(s)); - if (i == 15) { - gen_bx(s, tmp); - } else if (user) { - gen_helper_set_user_reg(tcg_const_i32(i), tmp); - dead_tmp(tmp); + if (user) { + tmp2 = tcg_const_i32(i); + gen_helper_set_user_reg(tmp2, tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); } else if (i == rn) { loaded_var = tmp; loaded_base = 1; } else { - store_reg(s, i, tmp); + store_reg_from_load(env, s, i, tmp); } } else { /* store */ if (i == 15) { /* special case: r15 = PC + 8 */ val = (long)s->pc + 4; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); } else if (user) { - tmp = new_tmp(); - gen_helper_get_user_reg(tmp, tcg_const_i32(i)); + tmp = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(i); + gen_helper_get_user_reg(tmp, tmp2); + tcg_temp_free_i32(tmp2); } else { tmp = load_reg(s, i); } @@ -6881,7 +7617,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) j++; /* no need to add after the last transfer */ if (j != n) - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp3); } } if (insn & (1 << 21)) { @@ -6891,7 +7627,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* pre increment */ } else { /* post increment */ - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp3); } } else { if (insn & (1 << 24)) { @@ -6905,8 +7641,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } + tcg_temp_free_i32(tmp3); if (loaded_base) { store_reg(s, rn, loaded_var); } @@ -6914,7 +7651,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) /* Restore CPSR from SPSR. */ tmp = load_cpu_field(spsr); gen_set_cpsr(tmp, 0xffffffff); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); s->is_jmp = DISAS_UPDATE; } } @@ -6923,10 +7660,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 0xb: { int32_t offset; + /* branch (and link) */ val = (int32_t)s->pc; if (insn & (1 << 24)) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, val); store_reg(s, 14, tmp); } @@ -6949,10 +7687,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) break; default: illegal_op: - gen_set_condexec(s); - gen_set_pc_im(s->pc - 4); - gen_exception(EXCP_UDEF); - s->is_jmp = DISAS_JUMP; + gen_exception_insn(s, 4, EXCP_UDEF); break; } } @@ -6972,70 +7707,69 @@ thumb2_logic_op(int op) Returns zero if the opcode is valid. */ static int -gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out) +gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCGv t0, TCGv t1) { int logic_cc; logic_cc = 0; switch (op) { case 0: /* and */ - gen_op_andl_T0_T1(); + tcg_gen_and_i32(t0, t0, t1); logic_cc = conds; break; case 1: /* bic */ - gen_op_bicl_T0_T1(); + tcg_gen_andc_i32(t0, t0, t1); logic_cc = conds; break; case 2: /* orr */ - gen_op_orl_T0_T1(); + tcg_gen_or_i32(t0, t0, t1); logic_cc = conds; break; case 3: /* orn */ - gen_op_notl_T1(); - gen_op_orl_T0_T1(); + tcg_gen_orc_i32(t0, t0, t1); logic_cc = conds; break; case 4: /* eor */ - gen_op_xorl_T0_T1(); + tcg_gen_xor_i32(t0, t0, t1); logic_cc = conds; break; case 8: /* add */ if (conds) - gen_op_addl_T0_T1_cc(); + gen_helper_add_cc(t0, t0, t1); else - gen_op_addl_T0_T1(); + tcg_gen_add_i32(t0, t0, t1); break; case 10: /* adc */ if (conds) - gen_op_adcl_T0_T1_cc(); + gen_helper_adc_cc(t0, t0, t1); else - gen_adc_T0_T1(); + gen_adc(t0, t1); break; case 11: /* sbc */ if (conds) - gen_op_sbcl_T0_T1_cc(); + gen_helper_sbc_cc(t0, t0, t1); else - gen_sbc_T0_T1(); + gen_sub_carry(t0, t0, t1); break; case 13: /* sub */ if (conds) - gen_op_subl_T0_T1_cc(); + gen_helper_sub_cc(t0, t0, t1); else - gen_op_subl_T0_T1(); + tcg_gen_sub_i32(t0, t0, t1); break; case 14: /* rsb */ if (conds) - gen_op_rsbl_T0_T1_cc(); + gen_helper_sub_cc(t0, t1, t0); else - gen_op_rsbl_T0_T1(); + tcg_gen_sub_i32(t0, t1, t0); break; default: /* 5, 6, 7, 9, 12, 15. */ return 1; } if (logic_cc) { - gen_op_logic_T0_cc(); + gen_logic_CC(t0); if (shifter_out) - gen_set_CF_bit31(cpu_T[1]); + gen_set_CF_bit31(t1); } return 0; } @@ -7062,13 +7796,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) 16-bit instructions to get correct prefetch abort behavior. */ insn = insn_hw1; if ((insn & (1 << 12)) == 0) { + ARCH(5); /* Second half of blx. */ offset = ((insn & 0x7ff) << 1); tmp = load_reg(s, 14); tcg_gen_addi_i32(tmp, tmp, offset); tcg_gen_andi_i32(tmp, tmp, 0xfffffffc); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc | 1); store_reg(s, 14, tmp2); gen_bx(s, tmp); @@ -7080,7 +7815,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, 14); tcg_gen_addi_i32(tmp, tmp, offset); - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, s->pc | 1); store_reg(s, 14, tmp2); gen_bx(s, tmp); @@ -7091,8 +7826,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) 16-bit instructions in case the second half causes an prefetch abort. */ offset = ((int32_t)insn << 21) >> 9; - gen_op_movl_T0_im(s->pc + 2 + offset); - gen_movl_reg_T0(s, 14); + tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset); return 0; } /* Fall through to 32-bit decode. */ @@ -7100,10 +7834,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) insn = lduw_code(s->pc); ANDROID_TRACE_START_THUMB(); - - insn |= (uint32_t)insn_hw1 << 16; - s->pc += 2; + insn |= (uint32_t)insn_hw1 << 16; if ((insn & 0xf800e800) != 0xf000e800) { ARCH(6T2); @@ -7123,7 +7855,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (insn & 0x01200000) { /* Load/store doubleword. */ if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, s->pc & ~3); } else { addr = load_reg(s, rn); @@ -7157,30 +7889,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i32(addr, addr, offset - 4); store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } else if ((insn & (1 << 23)) == 0) { /* Load/store exclusive word. */ - gen_movl_T1_reg(s, rn); - addr = cpu_T[1]; + addr = tcg_temp_local_new(); + load_reg_var(s, addr, rn); + tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2); if (insn & (1 << 20)) { - gen_helper_mark_exclusive(cpu_env, cpu_T[1]); - tmp = gen_ld32(addr, IS_USER(s)); - store_reg(s, rd, tmp); + gen_load_exclusive(s, rs, 15, addr, 2); } else { - int label = gen_new_label(); - gen_helper_test_exclusive(cpu_T[0], cpu_env, addr); - tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], - 0, label); - tmp = load_reg(s, rs); - gen_st32(tmp, cpu_T[1], IS_USER(s)); - gen_set_label(label); - gen_movl_reg_T0(s, rd); + gen_store_exclusive(s, rd, rs, 15, addr, 2); } + tcg_temp_free(addr); } else if ((insn & (1 << 6)) == 0) { /* Table Branch. */ if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, s->pc); } else { addr = load_reg(s, rn); @@ -7190,70 +7915,31 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (insn & (1 << 4)) { /* tbh */ tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = gen_ld16u(addr, IS_USER(s)); } else { /* tbb */ - dead_tmp(tmp); + tcg_temp_free_i32(tmp); tmp = gen_ld8u(addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); tcg_gen_shli_i32(tmp, tmp, 1); tcg_gen_addi_i32(tmp, tmp, s->pc); store_reg(s, 15, tmp); } else { /* Load/store exclusive byte/halfword/doubleword. */ - /* ??? These are not really atomic. However we know - we never have multiple CPUs running in parallel, - so it is good enough. */ + ARCH(7); op = (insn >> 4) & 0x3; - /* Must use a global reg for the address because we have - a conditional branch in the store instruction. */ - gen_movl_T1_reg(s, rn); - addr = cpu_T[1]; + if (op == 2) { + goto illegal_op; + } + addr = tcg_temp_local_new(); + load_reg_var(s, addr, rn); if (insn & (1 << 20)) { - gen_helper_mark_exclusive(cpu_env, addr); - switch (op) { - case 0: - tmp = gen_ld8u(addr, IS_USER(s)); - break; - case 1: - tmp = gen_ld16u(addr, IS_USER(s)); - break; - case 3: - tmp = gen_ld32(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, 4); - tmp2 = gen_ld32(addr, IS_USER(s)); - store_reg(s, rd, tmp2); - break; - default: - goto illegal_op; - } - store_reg(s, rs, tmp); + gen_load_exclusive(s, rs, rd, addr, op); } else { - int label = gen_new_label(); - /* Must use a global that is not killed by the branch. */ - gen_helper_test_exclusive(cpu_T[0], cpu_env, addr); - tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label); - tmp = load_reg(s, rs); - switch (op) { - case 0: - gen_st8(tmp, addr, IS_USER(s)); - break; - case 1: - gen_st16(tmp, addr, IS_USER(s)); - break; - case 3: - gen_st32(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, 4); - tmp = load_reg(s, rd); - gen_st32(tmp, addr, IS_USER(s)); - break; - default: - goto illegal_op; - } - gen_set_label(label); - gen_movl_reg_T0(s, rm); + gen_store_exclusive(s, rm, rs, rd, addr, op); } + tcg_temp_free(addr); } } else { /* Load/store multiple, RFE, SRS. */ @@ -7279,25 +7965,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } gen_rfe(s, tmp, tmp2); } else { /* srs */ op = (insn & 0x1f); - if (op == (env->uncached_cpsr & CPSR_M)) { - addr = load_reg(s, 13); - } else { - addr = new_tmp(); - gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op)); - } + addr = tcg_temp_new_i32(); + tmp = tcg_const_i32(op); + gen_helper_get_r13_banked(addr, cpu_env, tmp); + tcg_temp_free_i32(tmp); if ((insn & (1 << 24)) == 0) { tcg_gen_addi_i32(addr, addr, -8); } tmp = load_reg(s, 14); gen_st32(tmp, addr, 0); tcg_gen_addi_i32(addr, addr, 4); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); gen_helper_cpsr_read(tmp); gen_st32(tmp, addr, 0); if (insn & (1 << 21)) { @@ -7306,18 +7990,16 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else { tcg_gen_addi_i32(addr, addr, 4); } - if (op == (env->uncached_cpsr & CPSR_M)) { - store_reg(s, 13, addr); - } else { - gen_helper_set_r13_banked(cpu_env, - tcg_const_i32(op), addr); - } + tmp = tcg_const_i32(op); + gen_helper_set_r13_banked(cpu_env, tmp, addr); + tcg_temp_free_i32(tmp); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } } else { - int i; + int i, loaded_base = 0; + TCGv loaded_var; /* Load/store multiple. */ addr = load_reg(s, rn); offset = 0; @@ -7329,6 +8011,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_addi_i32(addr, addr, -offset); } + TCGV_UNUSED(loaded_var); + tmp2 = tcg_const_i32(4); for (i = 0; i < 16; i++) { if ((insn & (1 << i)) == 0) continue; @@ -7337,6 +8021,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = gen_ld32(addr, IS_USER(s)); if (i == 15) { gen_bx(s, tmp); + } else if (i == rn) { + loaded_var = tmp; + loaded_base = 1; } else { store_reg(s, i, tmp); } @@ -7345,8 +8032,12 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, i); gen_st32(tmp, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp2); } + if (loaded_base) { + store_reg(s, rn, loaded_var); + } + tcg_temp_free_i32(tmp2); if (insn & (1 << 21)) { /* Base register writeback. */ if (insn & (1 << 24)) { @@ -7357,27 +8048,60 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) goto illegal_op; store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } } break; - case 5: /* Data processing register constant shift. */ - if (rn == 15) - gen_op_movl_T0_im(0); - else - gen_movl_T0_reg(s, rn); - gen_movl_T1_reg(s, rm); + case 5: + op = (insn >> 21) & 0xf; - shiftop = (insn >> 4) & 3; - shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); - conds = (insn & (1 << 20)) != 0; - logic_cc = (conds && thumb2_logic_op(op)); - gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc); - if (gen_thumb2_data_op(s, op, conds, 0)) - goto illegal_op; - if (rd != 15) - gen_movl_reg_T0(s, rd); + if (op == 6) { + /* Halfword pack. */ + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3); + if (insn & (1 << 5)) { + /* pkhtb */ + if (shift == 0) + shift = 31; + tcg_gen_sari_i32(tmp2, tmp2, shift); + tcg_gen_andi_i32(tmp, tmp, 0xffff0000); + tcg_gen_ext16u_i32(tmp2, tmp2); + } else { + /* pkhbt */ + if (shift) + tcg_gen_shli_i32(tmp2, tmp2, shift); + tcg_gen_ext16u_i32(tmp, tmp); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + } + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else { + /* Data processing register constant shift. */ + if (rn == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } + tmp2 = load_reg(s, rm); + + shiftop = (insn >> 4) & 3; + shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); + conds = (insn & (1 << 20)) != 0; + logic_cc = (conds && thumb2_logic_op(op)); + gen_arm_shift_im(tmp2, shiftop, shift, logic_cc); + if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) + goto illegal_op; + tcg_temp_free_i32(tmp2); + if (rd != 15) { + store_reg(s, rd, tmp); + } else { + tcg_temp_free_i32(tmp); + } + } break; case 13: /* Misc data processing. */ op = ((insn >> 22) & 6) | ((insn >> 7) & 1); @@ -7399,10 +8123,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) case 1: /* Sign/zero extend. */ tmp = load_reg(s, rm); shift = (insn >> 4) & 3; - /* ??? In many cases it's not neccessary to do a + /* ??? In many cases it's not necessary to do a rotate, a shift is sufficient. */ if (shift != 0) - tcg_gen_rori_i32(tmp, tmp, shift * 8); + tcg_gen_rotri_i32(tmp, tmp, shift * 8); op = (insn >> 20) & 7; switch (op) { case 0: gen_sxth(tmp); break; @@ -7419,7 +8143,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_add16(tmp, tmp2); } else { tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } } store_reg(s, rd, tmp); @@ -7432,7 +8156,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); gen_thumb2_parallel_addsub(op, shift, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); break; case 3: /* Other data processing. */ @@ -7441,13 +8165,13 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) /* Saturating add/subtract. */ tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); - if (op & 2) - gen_helper_double_saturate(tmp, tmp); if (op & 1) + gen_helper_double_saturate(tmp, tmp); + if (op & 2) gen_helper_sub_saturate(tmp, tmp2, tmp); else gen_helper_add_saturate(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } else { tmp = load_reg(s, rn); switch (op) { @@ -7465,11 +8189,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) break; case 0x10: /* sel */ tmp2 = load_reg(s, rm); - tmp3 = new_tmp(); + tmp3 = tcg_temp_new_i32(); tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); - dead_tmp(tmp3); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); break; case 0x18: /* clz */ gen_helper_clz(tmp, tmp); @@ -7487,23 +8211,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) switch ((insn >> 20) & 7) { case 0: /* 32 x 32 -> 32 */ tcg_gen_mul_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); if (op) tcg_gen_sub_i32(tmp, tmp2, tmp); else tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 1: /* 16 x 16 -> 32 */ gen_mulxy(tmp, tmp2, op & 2, op & 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 2: /* Dual multiply add. */ @@ -7511,18 +8235,22 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (op) gen_swap_half(tmp2); gen_smul_dual(tmp, tmp2); - /* This addition cannot overflow. */ if (insn & (1 << 22)) { + /* This subtraction cannot overflow. */ tcg_gen_sub_i32(tmp, tmp, tmp2); } else { - tcg_gen_add_i32(tmp, tmp, tmp2); + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a signed + * operation, in which case we must set the Q flag. + */ + gen_helper_add_setq(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 3: /* 32 * 16 -> 32msb */ @@ -7532,41 +8260,41 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_sxth(tmp2); tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); if (rs != 15) { tmp2 = load_reg(s, rs); gen_helper_add_setq(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; - case 5: case 6: /* 32 * 32 -> 32msb */ - gen_imull(tmp, tmp2); - if (insn & (1 << 5)) { - gen_roundqd(tmp, tmp2); - dead_tmp(tmp2); - } else { - dead_tmp(tmp); - tmp = tmp2; - } + case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */ + tmp64 = gen_muls_i64_i32(tmp, tmp2); if (rs != 15) { - tmp2 = load_reg(s, rs); - if (insn & (1 << 21)) { - tcg_gen_add_i32(tmp, tmp, tmp2); + tmp = load_reg(s, rs); + if (insn & (1 << 20)) { + tmp64 = gen_addq_msw(tmp64, tmp); } else { - tcg_gen_sub_i32(tmp, tmp2, tmp); + tmp64 = gen_subq_msw(tmp64, tmp); } - dead_tmp(tmp2); } + if (insn & (1 << 4)) { + tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); + } + tcg_gen_shri_i64(tmp64, tmp64, 32); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); break; case 7: /* Unsigned sum of absolute differences. */ gen_helper_usad8(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); if (rs != 15) { tmp2 = load_reg(s, rs); tcg_gen_add_i32(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; } @@ -7584,7 +8312,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_helper_udiv(tmp, tmp, tmp2); else gen_helper_sdiv(tmp, tmp, tmp2); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); store_reg(s, rd, tmp); } else if ((op & 0xe) == 0xc) { /* Dual multiply accumulate long. */ @@ -7596,13 +8324,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else { tcg_gen_add_i32(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); /* BUGFIX */ tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); gen_addq(s, tmp64, rs, rd); gen_storeq_reg(s, rs, rd, tmp64); + tcg_temp_free_i64(tmp64); } else { if (op & 0x20) { /* Unsigned 64-bit multiply */ @@ -7611,10 +8340,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (op & 8) { /* smlalxy */ gen_mulxy(tmp, tmp2, op & 2, op & 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); tmp64 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(tmp64, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); } else { /* Signed 64-bit multiply */ tmp64 = gen_muls_i64_i32(tmp, tmp2); @@ -7629,6 +8358,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_addq(s, tmp64, rs, rd); } gen_storeq_reg(s, rs, rd, tmp64); + tcg_temp_free_i64(tmp64); } break; } @@ -7637,7 +8367,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) /* Coprocessor. */ if (((insn >> 24) & 3) == 3) { /* Translate into the equivalent ARM encoding. */ - insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4); + insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); if (disas_neon_data_insn(env, s, insn)) goto illegal_op; } else { @@ -7664,8 +8394,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (insn & (1 << 14)) { /* Branch and link. */ - gen_op_movl_T1_im(s->pc | 1); - gen_movl_reg_T1(s, 14); + tcg_gen_movi_i32(cpu_R[14], s->pc | 1); } offset += s->pc; @@ -7675,6 +8404,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) } else { /* blx */ offset &= ~(uint32_t)2; + /* thumb2 bx, no need to check */ gen_bx_im(s, offset); } } else if (((insn >> 23) & 7) == 7) { @@ -7683,8 +8413,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) goto illegal_op; if (insn & (1 << 26)) { - /* Secure monitor call (v6Z) */ - goto illegal_op; /* not implemented. */ + /* Secure monitor call / smc (v6Z) */ + if (!(env->cp15.c0_c2[4] & 0xf000) || IS_USER(s)) { + goto illegal_op; + } + gen_smc(env, s); } else { op = (insn >> 20) & 7; switch (op) { @@ -7693,6 +8426,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tmp = load_reg(s, rn); addr = tcg_const_i32(insn & 0xff); gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); gen_lookup_tb(s); break; } @@ -7700,10 +8435,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) case 1: /* msr spsr. */ if (IS_M(env)) goto illegal_op; - gen_movl_T0_reg(s, rn); - if (gen_set_psr_T0(s, + tmp = load_reg(s, rn); + if (gen_set_psr(s, msr_mask(env, s, (insn >> 8) & 0xf, op == 1), - op == 1)) + op == 1, tmp)) goto illegal_op; break; case 2: /* cps, nop-hint. */ @@ -7730,21 +8465,20 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm |= (insn & 0x1f); } if (offset) { - gen_op_movl_T0_im(imm); - gen_set_psr_T0(s, offset, 0); + gen_set_psr_im(s, offset, 0, imm); } break; case 3: /* Special control operations. */ + ARCH(7); op = (insn >> 4) & 0xf; switch (op) { case 2: /* clrex */ - gen_helper_clrex(cpu_env); + gen_clrex(s); break; case 4: /* dsb */ case 5: /* dmb */ case 6: /* isb */ /* These execute as NOPs. */ - ARCH(7); break; default: goto illegal_op; @@ -7756,13 +8490,22 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_bx(s, tmp); break; case 5: /* Exception return. */ - /* Unpredictable in user mode. */ - goto illegal_op; + if (IS_USER(s)) { + goto illegal_op; + } + if (rn != 14 || rd != 15) { + goto illegal_op; + } + tmp = load_reg(s, rn); + tcg_gen_subi_i32(tmp, tmp, insn & 0xff); + gen_exception_return(s, tmp); + break; case 6: /* mrs cpsr. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); if (IS_M(env)) { addr = tcg_const_i32(insn & 0xff); gen_helper_v7m_mrs(tmp, cpu_env, addr); + tcg_temp_free_i32(addr); } else { gen_helper_cpsr_read(tmp); } @@ -7810,7 +8553,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm = insn & 0x1f; shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); if (rn == 15) { - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); } else { tmp = load_reg(s, rn); @@ -7837,7 +8580,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (imm != 32) { tmp2 = load_reg(s, rd); gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1); - dead_tmp(tmp2); + tcg_temp_free_i32(tmp2); } break; case 7: @@ -7863,6 +8606,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) else gen_helper_ssat(tmp, tmp, tmp2); } + tcg_temp_free_i32(tmp2); break; } store_reg(s, rd, tmp); @@ -7879,7 +8623,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) tcg_gen_ori_i32(tmp, tmp, imm << 16); } else { /* movw */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, imm); } } else { @@ -7890,7 +8634,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) offset -= imm; else offset += imm; - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, offset); } else { tmp = load_reg(s, rn); @@ -7929,19 +8673,25 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) shifter_out = 1; break; } - gen_op_movl_T1_im(imm); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, imm); rn = (insn >> 16) & 0xf; - if (rn == 15) - gen_op_movl_T0_im(0); - else - gen_movl_T0_reg(s, rn); + if (rn == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } op = (insn >> 21) & 0xf; if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0, - shifter_out)) + shifter_out, tmp, tmp2)) goto illegal_op; + tcg_temp_free_i32(tmp2); rd = (insn >> 8) & 0xf; if (rd != 15) { - gen_movl_reg_T0(s, rd); + store_reg(s, rd, tmp); + } else { + tcg_temp_free_i32(tmp); } } } @@ -7956,9 +8706,45 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) goto illegal_op; break; } + op = ((insn >> 21) & 3) | ((insn >> 22) & 4); + if (rs == 15) { + if (!(insn & (1 << 20))) { + goto illegal_op; + } + if (op != 2) { + /* Byte or halfword load space with dest == r15 : memory hints. + * Catch them early so we don't emit pointless addressing code. + * This space is a mix of: + * PLD/PLDW/PLI, which we implement as NOPs (note that unlike + * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP + * cores) + * unallocated hints, which must be treated as NOPs + * UNPREDICTABLE space, which we NOP or UNDEF depending on + * which is easiest for the decoding logic + * Some space which must UNDEF + */ + int op1 = (insn >> 23) & 3; + int op2 = (insn >> 6) & 0x3f; + if (op & 2) { + goto illegal_op; + } + if (rn == 15) { + /* UNPREDICTABLE or unallocated hint */ + return 0; + } + if (op1 & 1) { + return 0; /* PLD* or unallocated hint */ + } + if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) { + return 0; /* PLD* or unallocated hint */ + } + /* UNDEF space, or an UNPREDICTABLE */ + return 1; + } + } user = IS_USER(s); if (rn == 15) { - addr = new_tmp(); + addr = tcg_temp_new_i32(); /* PC relative. */ /* s->pc has already been incremented by 4. */ imm = s->pc & 0xfffffffc; @@ -7974,77 +8760,74 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm = insn & 0xfff; tcg_gen_addi_i32(addr, addr, imm); } else { - op = (insn >> 8) & 7; imm = insn & 0xff; - switch (op) { - case 0: case 8: /* Shifted Register. */ + switch ((insn >> 8) & 0xf) { + case 0x0: /* Shifted Register. */ shift = (insn >> 4) & 0xf; - if (shift > 3) + if (shift > 3) { + tcg_temp_free_i32(addr); goto illegal_op; + } tmp = load_reg(s, rm); if (shift) tcg_gen_shli_i32(tmp, tmp, shift); tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); break; - case 4: /* Negative offset. */ + case 0xc: /* Negative offset. */ tcg_gen_addi_i32(addr, addr, -imm); break; - case 6: /* User privilege. */ + case 0xe: /* User privilege. */ tcg_gen_addi_i32(addr, addr, imm); user = 1; break; - case 1: /* Post-decrement. */ + case 0x9: /* Post-decrement. */ imm = -imm; /* Fall through. */ - case 3: /* Post-increment. */ + case 0xb: /* Post-increment. */ postinc = 1; writeback = 1; break; - case 5: /* Pre-decrement. */ + case 0xd: /* Pre-decrement. */ imm = -imm; /* Fall through. */ - case 7: /* Pre-increment. */ + case 0xf: /* Pre-increment. */ tcg_gen_addi_i32(addr, addr, imm); writeback = 1; break; default: + tcg_temp_free_i32(addr); goto illegal_op; } } } - op = ((insn >> 21) & 3) | ((insn >> 22) & 4); if (insn & (1 << 20)) { /* Load. */ - if (rs == 15 && op != 2) { - if (op & 2) - goto illegal_op; - /* Memory hint. Implemented as NOP. */ + switch (op) { + case 0: tmp = gen_ld8u(addr, user); break; + case 4: tmp = gen_ld8s(addr, user); break; + case 1: tmp = gen_ld16u(addr, user); break; + case 5: tmp = gen_ld16s(addr, user); break; + case 2: tmp = gen_ld32(addr, user); break; + default: + tcg_temp_free_i32(addr); + goto illegal_op; + } + if (rs == 15) { + gen_bx(s, tmp); } else { - switch (op) { - case 0: tmp = gen_ld8u(addr, user); break; - case 4: tmp = gen_ld8s(addr, user); break; - case 1: tmp = gen_ld16u(addr, user); break; - case 5: tmp = gen_ld16s(addr, user); break; - case 2: tmp = gen_ld32(addr, user); break; - default: goto illegal_op; - } - if (rs == 15) { - gen_bx(s, tmp); - } else { - store_reg(s, rs, tmp); - } + store_reg(s, rs, tmp); } } else { /* Store. */ - if (rs == 15) - goto illegal_op; tmp = load_reg(s, rs); switch (op) { case 0: gen_st8(tmp, addr, user); break; case 1: gen_st16(tmp, addr, user); break; case 2: gen_st32(tmp, addr, user); break; - default: goto illegal_op; + default: + tcg_temp_free_i32(addr); + goto illegal_op; } } if (postinc) @@ -8052,7 +8835,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (writeback) { store_reg(s, rn, addr); } else { - dead_tmp(addr); + tcg_temp_free_i32(addr); } } break; @@ -8075,9 +8858,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (s->condexec_mask) { cond = s->condexec_cond; - s->condlabel = gen_new_label(); - gen_test_cc(cond ^ 1, s->condlabel); - s->condjmp = 1; + if (cond != 0x0e) { /* Skip conditional when condition is AL. */ + s->condlabel = gen_new_label(); + gen_test_cc(cond ^ 1, s->condlabel); + s->condjmp = 1; + } } insn = lduw_code(s->pc); @@ -8090,32 +8875,35 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) switch (insn >> 12) { case 0: case 1: + rd = insn & 7; op = (insn >> 11) & 3; if (op == 3) { /* add/subtract */ rn = (insn >> 3) & 7; - gen_movl_T0_reg(s, rn); + tmp = load_reg(s, rn); if (insn & (1 << 10)) { /* immediate */ - gen_op_movl_T1_im((insn >> 6) & 7); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, (insn >> 6) & 7); } else { /* reg */ rm = (insn >> 6) & 7; - gen_movl_T1_reg(s, rm); + tmp2 = load_reg(s, rm); } if (insn & (1 << 9)) { if (s->condexec_mask) - gen_op_subl_T0_T1(); + tcg_gen_sub_i32(tmp, tmp, tmp2); else - gen_op_subl_T0_T1_cc(); + gen_helper_sub_cc(tmp, tmp, tmp2); } else { if (s->condexec_mask) - gen_op_addl_T0_T1(); + tcg_gen_add_i32(tmp, tmp, tmp2); else - gen_op_addl_T0_T1_cc(); + gen_helper_add_cc(tmp, tmp, tmp2); } - gen_movl_reg_T0(s, rd); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); } else { /* shift immediate */ rm = (insn >> 3) & 7; @@ -8131,35 +8919,40 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) /* arithmetic large immediate */ op = (insn >> 11) & 3; rd = (insn >> 8) & 0x7; - if (op == 0) { - gen_op_movl_T0_im(insn & 0xff); - } else { - gen_movl_T0_reg(s, rd); - gen_op_movl_T1_im(insn & 0xff); - } - switch (op) { - case 0: /* mov */ + if (op == 0) { /* mov */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, insn & 0xff); if (!s->condexec_mask) - gen_op_logic_T0_cc(); - break; - case 1: /* cmp */ - gen_op_subl_T0_T1_cc(); - break; - case 2: /* add */ - if (s->condexec_mask) - gen_op_addl_T0_T1(); - else - gen_op_addl_T0_T1_cc(); - break; - case 3: /* sub */ - if (s->condexec_mask) - gen_op_subl_T0_T1(); - else - gen_op_subl_T0_T1_cc(); - break; + gen_logic_CC(tmp); + store_reg(s, rd, tmp); + } else { + tmp = load_reg(s, rd); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, insn & 0xff); + switch (op) { + case 1: /* cmp */ + gen_helper_sub_cc(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + case 2: /* add */ + if (s->condexec_mask) + tcg_gen_add_i32(tmp, tmp, tmp2); + else + gen_helper_add_cc(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 3: /* sub */ + if (s->condexec_mask) + tcg_gen_sub_i32(tmp, tmp, tmp2); + else + gen_helper_sub_cc(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + } } - if (op != 1) - gen_movl_reg_T0(s, rd); break; case 4: if (insn & (1 << 11)) { @@ -8167,10 +8960,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) /* load pc-relative. Bit 1 of PC is ignored. */ val = s->pc + 2 + ((insn & 0xff) * 4); val &= ~(uint32_t)2; - addr = new_tmp(); + addr = tcg_temp_new_i32(); tcg_gen_movi_i32(addr, val); tmp = gen_ld32(addr, IS_USER(s)); - dead_tmp(addr); + tcg_temp_free_i32(addr); store_reg(s, rd, tmp); break; } @@ -8181,28 +8974,33 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) op = (insn >> 8) & 3; switch (op) { case 0: /* add */ - gen_movl_T0_reg(s, rd); - gen_movl_T1_reg(s, rm); - gen_op_addl_T0_T1(); - gen_movl_reg_T0(s, rd); + tmp = load_reg(s, rd); + tmp2 = load_reg(s, rm); + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); break; case 1: /* cmp */ - gen_movl_T0_reg(s, rd); - gen_movl_T1_reg(s, rm); - gen_op_subl_T0_T1_cc(); + tmp = load_reg(s, rd); + tmp2 = load_reg(s, rm); + gen_helper_sub_cc(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); break; case 2: /* mov/cpy */ - gen_movl_T0_reg(s, rm); - gen_movl_reg_T0(s, rd); + tmp = load_reg(s, rm); + store_reg(s, rd, tmp); break; case 3:/* branch [and link] exchange thumb register */ tmp = load_reg(s, rm); if (insn & (1 << 7)) { + ARCH(5); val = (uint32_t)s->pc | 1; - tmp2 = new_tmp(); + tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, val); store_reg(s, 14, tmp2); } + /* already thumb, no need to check */ gen_bx(s, tmp); break; } @@ -8223,114 +9021,126 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) val = 0; } - if (op == 9) /* neg */ - gen_op_movl_T0_im(0); - else if (op != 0xf) /* mvn doesn't read its first operand */ - gen_movl_T0_reg(s, rd); + if (op == 9) { /* neg */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else if (op != 0xf) { /* mvn doesn't read its first operand */ + tmp = load_reg(s, rd); + } else { + TCGV_UNUSED(tmp); + } - gen_movl_T1_reg(s, rm); + tmp2 = load_reg(s, rm); switch (op) { case 0x0: /* and */ - gen_op_andl_T0_T1(); + tcg_gen_and_i32(tmp, tmp, tmp2); if (!s->condexec_mask) - gen_op_logic_T0_cc(); + gen_logic_CC(tmp); break; case 0x1: /* eor */ - gen_op_xorl_T0_T1(); + tcg_gen_xor_i32(tmp, tmp, tmp2); if (!s->condexec_mask) - gen_op_logic_T0_cc(); + gen_logic_CC(tmp); break; case 0x2: /* lsl */ if (s->condexec_mask) { - gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]); + gen_helper_shl(tmp2, tmp2, tmp); } else { - gen_helper_shl_cc(cpu_T[1], cpu_T[1], cpu_T[0]); - gen_op_logic_T1_cc(); + gen_helper_shl_cc(tmp2, tmp2, tmp); + gen_logic_CC(tmp2); } break; case 0x3: /* lsr */ if (s->condexec_mask) { - gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]); + gen_helper_shr(tmp2, tmp2, tmp); } else { - gen_helper_shr_cc(cpu_T[1], cpu_T[1], cpu_T[0]); - gen_op_logic_T1_cc(); + gen_helper_shr_cc(tmp2, tmp2, tmp); + gen_logic_CC(tmp2); } break; case 0x4: /* asr */ if (s->condexec_mask) { - gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]); + gen_helper_sar(tmp2, tmp2, tmp); } else { - gen_helper_sar_cc(cpu_T[1], cpu_T[1], cpu_T[0]); - gen_op_logic_T1_cc(); + gen_helper_sar_cc(tmp2, tmp2, tmp); + gen_logic_CC(tmp2); } break; case 0x5: /* adc */ if (s->condexec_mask) - gen_adc_T0_T1(); + gen_adc(tmp, tmp2); else - gen_op_adcl_T0_T1_cc(); + gen_helper_adc_cc(tmp, tmp, tmp2); break; case 0x6: /* sbc */ if (s->condexec_mask) - gen_sbc_T0_T1(); + gen_sub_carry(tmp, tmp, tmp2); else - gen_op_sbcl_T0_T1_cc(); + gen_helper_sbc_cc(tmp, tmp, tmp2); break; case 0x7: /* ror */ if (s->condexec_mask) { - gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]); + tcg_gen_andi_i32(tmp, tmp, 0x1f); + tcg_gen_rotr_i32(tmp2, tmp2, tmp); } else { - gen_helper_ror_cc(cpu_T[1], cpu_T[1], cpu_T[0]); - gen_op_logic_T1_cc(); + gen_helper_ror_cc(tmp2, tmp2, tmp); + gen_logic_CC(tmp2); } break; case 0x8: /* tst */ - gen_op_andl_T0_T1(); - gen_op_logic_T0_cc(); + tcg_gen_and_i32(tmp, tmp, tmp2); + gen_logic_CC(tmp); rd = 16; break; case 0x9: /* neg */ if (s->condexec_mask) - tcg_gen_neg_i32(cpu_T[0], cpu_T[1]); + tcg_gen_neg_i32(tmp, tmp2); else - gen_op_subl_T0_T1_cc(); + gen_helper_sub_cc(tmp, tmp, tmp2); break; case 0xa: /* cmp */ - gen_op_subl_T0_T1_cc(); + gen_helper_sub_cc(tmp, tmp, tmp2); rd = 16; break; case 0xb: /* cmn */ - gen_op_addl_T0_T1_cc(); + gen_helper_add_cc(tmp, tmp, tmp2); rd = 16; break; case 0xc: /* orr */ - gen_op_orl_T0_T1(); + tcg_gen_or_i32(tmp, tmp, tmp2); if (!s->condexec_mask) - gen_op_logic_T0_cc(); + gen_logic_CC(tmp); break; case 0xd: /* mul */ - gen_op_mull_T0_T1(); + tcg_gen_mul_i32(tmp, tmp, tmp2); if (!s->condexec_mask) - gen_op_logic_T0_cc(); + gen_logic_CC(tmp); break; case 0xe: /* bic */ - gen_op_bicl_T0_T1(); + tcg_gen_andc_i32(tmp, tmp, tmp2); if (!s->condexec_mask) - gen_op_logic_T0_cc(); + gen_logic_CC(tmp); break; case 0xf: /* mvn */ - gen_op_notl_T1(); + tcg_gen_not_i32(tmp2, tmp2); if (!s->condexec_mask) - gen_op_logic_T1_cc(); + gen_logic_CC(tmp2); val = 1; rm = rd; break; } if (rd != 16) { - if (val) - gen_movl_reg_T1(s, rm); - else - gen_movl_reg_T0(s, rd); + if (val) { + store_reg(s, rm, tmp2); + if (op != 0xf) + tcg_temp_free_i32(tmp); + } else { + store_reg(s, rd, tmp); + tcg_temp_free_i32(tmp2); + } + } else { + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); } break; @@ -8343,7 +9153,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) addr = load_reg(s, rn); tmp = load_reg(s, rm); tcg_gen_add_i32(addr, addr, tmp); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); if (op < 3) /* store */ tmp = load_reg(s, rd); @@ -8376,7 +9186,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) } if (op >= 3) /* load */ store_reg(s, rd, tmp); - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 6: @@ -8396,7 +9206,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 7: @@ -8416,7 +9226,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st8(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 8: @@ -8436,7 +9246,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st16(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 9: @@ -8455,7 +9265,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, rd); gen_st32(tmp, addr, IS_USER(s)); } - dead_tmp(addr); + tcg_temp_free_i32(addr); break; case 10: @@ -8466,7 +9276,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, 13); } else { /* PC. bit 1 is ignored. */ - tmp = new_tmp(); + tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2); } val = (insn & 0xff) * 4; @@ -8515,6 +9325,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if ((insn & (1 << 11)) == 0) { tcg_gen_addi_i32(addr, addr, -offset); } + tmp2 = tcg_const_i32(4); for (i = 0; i < 8; i++) { if (insn & (1 << i)) { if (insn & (1 << 11)) { @@ -8527,7 +9338,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) gen_st32(tmp, addr, IS_USER(s)); } /* advance to the next address. */ - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp2); } } TCGV_UNUSED(tmp); @@ -8542,16 +9353,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tmp = load_reg(s, 14); gen_st32(tmp, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, 4); + tcg_gen_add_i32(addr, addr, tmp2); } + tcg_temp_free_i32(tmp2); if ((insn & (1 << 11)) == 0) { tcg_gen_addi_i32(addr, addr, -offset); } /* write back the new stack pointer */ store_reg(s, 13, addr); /* set the new PC value */ - if ((insn & 0x0900) == 0x0900) - gen_bx(s, tmp); + if ((insn & 0x0900) == 0x0900) { + store_reg_from_load(env, s, 15, tmp); + } break; case 1: case 3: case 9: case 11: /* czb */ @@ -8563,7 +9376,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel); else tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel); - dead_tmp(tmp); + tcg_temp_free_i32(tmp); offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3; val = (uint32_t)s->pc + 2; val += offset; @@ -8582,10 +9395,8 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 0xe: /* bkpt */ - gen_set_condexec(s); - gen_set_pc_im(s->pc - 2); - gen_exception(EXCP_BKPT); - s->is_jmp = DISAS_JUMP; + ARCH(5); + gen_exception_insn(s, 2, EXCP_BKPT); break; case 0xa: /* rev */ @@ -8612,22 +9423,22 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (insn & 1) { addr = tcg_const_i32(16); gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); } /* FAULTMASK */ if (insn & 2) { addr = tcg_const_i32(17); gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); } + tcg_temp_free_i32(tmp); gen_lookup_tb(s); } else { if (insn & (1 << 4)) shift = CPSR_A | CPSR_I | CPSR_F; else shift = 0; - - val = ((insn & 7) << 6) & shift; - gen_op_movl_T0_im(val); - gen_set_psr_T0(s, shift, 0); + gen_set_psr_im(s, ((insn & 7) << 6), 0, shift); } break; @@ -8637,7 +9448,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 12: + { /* load/store multiple */ + TCGv loaded_var; + TCGV_UNUSED(loaded_var); rn = (insn >> 8) & 0x7; addr = load_reg(s, rn); for (i = 0; i < 8; i++) { @@ -8645,7 +9459,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (insn & (1 << 11)) { /* load */ tmp = gen_ld32(addr, IS_USER(s)); - store_reg(s, i, tmp); + if (i == rn) { + loaded_var = tmp; + } else { + store_reg(s, i, tmp); + } } else { /* store */ tmp = load_reg(s, i); @@ -8655,14 +9473,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) tcg_gen_addi_i32(addr, addr, 4); } } - /* Base register writeback. */ if ((insn & (1 << rn)) == 0) { + /* base reg not in list: base register writeback */ store_reg(s, rn, addr); } else { - dead_tmp(addr); + /* base reg in list: if load, complete it now */ + if (insn & (1 << 11)) { + store_reg(s, rn, loaded_var); + } + tcg_temp_free_i32(addr); } break; - + } case 13: /* conditional branch or swi */ cond = (insn >> 8) & 0xf; @@ -8671,7 +9493,6 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) if (cond == 0xf) { /* swi */ - gen_set_condexec(s); gen_set_pc_im(s->pc); s->is_jmp = DISAS_SWI; break; @@ -8680,7 +9501,6 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) s->condlabel = gen_new_label(); gen_test_cc(cond ^ 1, s->condlabel); s->condjmp = 1; - gen_movl_T1_reg(s, 15); /* jump to the offset */ val = (uint32_t)s->pc + 2; @@ -8709,17 +9529,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) } return; undef32: - gen_set_condexec(s); - gen_set_pc_im(s->pc - 4); - gen_exception(EXCP_UDEF); - s->is_jmp = DISAS_JUMP; + gen_exception_insn(s, 4, EXCP_UDEF); return; illegal_op: undef: - gen_set_condexec(s); - gen_set_pc_im(s->pc - 2); - gen_exception(EXCP_UDEF); - s->is_jmp = DISAS_JUMP; + gen_exception_insn(s, 2, EXCP_UDEF); } /* generate intermediate code in gen_opc_buf and gen_opparam_buf for @@ -8739,9 +9553,6 @@ static inline void gen_intermediate_code_internal(CPUState *env, int max_insns; /* generate intermediate code */ - num_temps = 0; - memset(temps, 0, sizeof(temps)); - pc_start = tb->pc; dc->tb = tb; @@ -8752,18 +9563,16 @@ static inline void gen_intermediate_code_internal(CPUState *env, dc->pc = pc_start; dc->singlestep_enabled = env->singlestep_enabled; dc->condjmp = 0; - dc->thumb = env->thumb; - dc->condexec_mask = (env->condexec_bits & 0xf) << 1; - dc->condexec_mask_prev = dc->condexec_mask; - dc->condexec_cond = env->condexec_bits >> 4; + dc->thumb = ARM_TBFLAG_THUMB(tb->flags); + dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1; + dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4; #if !defined(CONFIG_USER_ONLY) - if (IS_M(env)) { - dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1)); - } else { - dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR; - } + dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0); #endif ANDROID_START_CODEGEN(search_pc); + dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags); + dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags); + dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags); cpu_F0s = tcg_temp_new_i32(); cpu_F1s = tcg_temp_new_i32(); cpu_F0d = tcg_temp_new_i64(); @@ -8782,6 +9591,47 @@ static inline void gen_intermediate_code_internal(CPUState *env, gen_icount_start(); ANDROID_TRACE_START_BB(); + tcg_clear_temp_count(); + + /* A note on handling of the condexec (IT) bits: + * + * We want to avoid the overhead of having to write the updated condexec + * bits back to the CPUState for every instruction in an IT block. So: + * (1) if the condexec bits are not already zero then we write + * zero back into the CPUState now. This avoids complications trying + * to do it at the end of the block. (For example if we don't do this + * it's hard to identify whether we can safely skip writing condexec + * at the end of the TB, which we definitely want to do for the case + * where a TB doesn't do anything with the IT state at all.) + * (2) if we are going to leave the TB then we call gen_set_condexec() + * which will write the correct value into CPUState if zero is wrong. + * This is done both for leaving the TB at the end, and for leaving + * it because of an exception we know will happen, which is done in + * gen_exception_insn(). The latter is necessary because we need to + * leave the TB with the PC/IT state just prior to execution of the + * instruction which caused the exception. + * (3) if we leave the TB unexpectedly (eg a data abort on a load) + * then the CPUState will be wrong and we need to reset it. + * This is handled in the same way as restoration of the + * PC in these situations: we will be called again with search_pc=1 + * and generate a mapping of the condexec bits for each PC in + * gen_opc_condexec_bits[]. restore_state_to_opc() then uses + * this to restore the condexec bits. + * + * Note that there are no instructions which can read the condexec + * bits, and none which can write non-static values to them, so + * we don't need to care about whether CPUState is correct in the + * middle of a TB. + */ + + /* Reset the conditional execution bits immediately. This avoids + complications trying to do it at the end of the block. */ + if (dc->condexec_mask || dc->condexec_cond) + { + TCGv tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + store_cpu_field(tmp, condexec_bits); + } do { #ifdef CONFIG_USER_ONLY /* Intercept jump to the magic kernel page. */ @@ -8805,10 +9655,7 @@ static inline void gen_intermediate_code_internal(CPUState *env, if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { QTAILQ_FOREACH(bp, &env->breakpoints, entry) { if (bp->pc == dc->pc) { - gen_set_condexec(dc); - gen_set_pc_im(dc->pc); - gen_exception(EXCP_DEBUG); - dc->is_jmp = DISAS_JUMP; + gen_exception_insn(dc, 0, EXCP_DEBUG); /* Advance PC so that clearing the breakpoint will invalidate this TB. */ dc->pc += 2; @@ -8826,6 +9673,7 @@ static inline void gen_intermediate_code_internal(CPUState *env, gen_opc_instr_start[lj++] = 0; } gen_opc_pc[lj] = dc->pc; + gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); gen_opc_instr_start[lj] = 1; gen_opc_icount[lj] = num_insns; } @@ -8833,9 +9681,12 @@ static inline void gen_intermediate_code_internal(CPUState *env, if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) gen_io_start(); - if (env->thumb) { + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { + tcg_gen_debug_insn_start(dc->pc); + } + + if (dc->thumb) { disas_thumb_insn(env, dc); - dc->condexec_mask_prev = dc->condexec_mask; if (dc->condexec_mask) { dc->condexec_cond = (dc->condexec_cond & 0xe) | ((dc->condexec_mask >> 4) & 1); @@ -8847,16 +9698,16 @@ static inline void gen_intermediate_code_internal(CPUState *env, } else { disas_arm_insn(env, dc); } - if (num_temps) { - fprintf(stderr, "Internal resource leak before %08x (%d temps)\n", dc->pc, num_temps); - tcg_dump_ops(&tcg_ctx, stderr); - num_temps = 0; - } if (dc->condjmp && !dc->is_jmp) { gen_set_label(dc->condlabel); dc->condjmp = 0; } + + if (tcg_check_temp_count()) { + fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc); + } + /* Translation stops when a conditional branch is encountered. * Otherwise the subsequent code could get translated several times. * Also stop translation when a page boundary is reached. This @@ -8888,6 +9739,8 @@ static inline void gen_intermediate_code_internal(CPUState *env, gen_set_condexec(dc); if (dc->is_jmp == DISAS_SWI) { gen_exception(EXCP_SWI); + } else if (dc->is_jmp == DISAS_SMC) { + gen_exception(EXCP_SMC); } else { gen_exception(EXCP_DEBUG); } @@ -8900,6 +9753,8 @@ static inline void gen_intermediate_code_internal(CPUState *env, gen_set_condexec(dc); if (dc->is_jmp == DISAS_SWI && !dc->condjmp) { gen_exception(EXCP_SWI); + } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) { + gen_exception(EXCP_SMC); } else { /* FIXME: Single stepping a WFI insn will not halt the CPU. */ @@ -8934,6 +9789,9 @@ static inline void gen_intermediate_code_internal(CPUState *env, case DISAS_SWI: gen_exception(EXCP_SWI); break; + case DISAS_SMC: + gen_exception(EXCP_SMC); + break; } if (dc->condjmp) { gen_set_label(dc->condlabel); @@ -8951,7 +9809,7 @@ done_generating: if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); - log_target_disas(pc_start, dc->pc - pc_start, env->thumb); + log_target_disas(pc_start, dc->pc - pc_start, dc->thumb); qemu_log("\n"); } #endif @@ -8982,8 +9840,7 @@ static const char *cpu_mode_names[16] = { "???", "???", "???", "und", "???", "???", "???", "sys" }; -void cpu_dump_state(CPUState *env, FILE *f, - int (*cpu_fprintf)(FILE *f, const char *fmt, ...), +void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf, int flags) { int i; @@ -9038,4 +9895,5 @@ void cpu_dump_state(CPUState *env, FILE *f, void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos) { env->regs[15] = gen_opc_pc[pc_pos]; + env->condexec_bits = gen_opc_condexec_bits[pc_pos]; } |