aboutsummaryrefslogtreecommitdiffstats
path: root/target-arm
diff options
context:
space:
mode:
authorDavid 'Digit' Turner <digit@android.com>2011-06-03 13:41:05 +0200
committerDavid 'Digit' Turner <digit@android.com>2011-06-08 15:10:43 +0200
commit5285864985be9077e58e42235af6582dee72e841 (patch)
tree1020a7d95bec028cdba66816c2b4dc6c0bd79073 /target-arm
parent945e4f4f8554b7b2f30b95d3560465c93975a8a9 (diff)
downloadexternal_qemu-5285864985be9077e58e42235af6582dee72e841.zip
external_qemu-5285864985be9077e58e42235af6582dee72e841.tar.gz
external_qemu-5285864985be9077e58e42235af6582dee72e841.tar.bz2
target-arm: integrate upstream ARM translator.
The new translator has the following benefits: - faster emulation of ARMv5TE code (through improved JIT) - proper support for ARMv7 and NEON - rebuilding the full-eng platform images for ARMv7-A results in additionnal speed increases (a.k.a. Thumb-2 rocks!). Note that, as an interesting side effect, NEON machine code is generally slower than the equivalent C code it is supposed to replace when run inside the emulator. This can be explained by the fact that for now the translator simply translates each NEON instruction into a series of sequential host instructions (and also requires over-head for packing/unpacking/saturation/ etc...). This change has been tested by running the "full-eng" platform image rebuilt for ARMv7-A and Neon and using an appropriate kernel image (prebuilt/android-arm/kernel/kernel-qemu-armv7). The system could boot and seems to work perfectly. Not a single issue has been experienced during testing. On a 2.4 GHz Xeon CPU, the image boots in about 25 seconds (compared to 40 seconds for a vanilla one without this emulator patch). Thanks to Peter Maydell at Linaro and ARM with his hard work to make this happen (first in upstream, and now on Android). This integration is based on the Meego git repository (git://gitorious.org/qemu-maemo/qemu.git) using the following hash: 7e2d65b0c95c865b1fa6d3d4948e8e822b9ac2fd On top of which, the following upstream patch has been applied (with recommendation from Peter): b7fa9214d8d4f57992c9acd0ccb125c54a095f00 (We chose this repository because it was the closest to the previous integrate. We will probably use the Linaro ones for future work on this part of the emulator). Change-Id: I54837e3d2e908b2380d158411d7a9813630e7e4e
Diffstat (limited to 'target-arm')
-rw-r--r--target-arm/cpu.h149
-rw-r--r--target-arm/exec.h12
-rw-r--r--target-arm/helper.c1202
-rw-r--r--target-arm/helper.h (renamed from target-arm/helpers.h)315
-rw-r--r--target-arm/iwmmxt_helper.c85
-rw-r--r--target-arm/machine.c40
-rw-r--r--target-arm/neon_helper.c907
-rw-r--r--target-arm/op_addsub.h8
-rw-r--r--target-arm/op_helper.c314
-rw-r--r--target-arm/translate.c5102
10 files changed, 5106 insertions, 3028 deletions
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f16e391..b842c52 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -43,6 +43,7 @@
#define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */
#define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */
#define EXCP_STREX 10
+#define EXCP_SMC 11 /* secure monitor call */
#define ARMV7M_EXCP_RESET 1
#define ARMV7M_EXCP_NMI 2
@@ -55,10 +56,16 @@
#define ARMV7M_EXCP_PENDSV 14
#define ARMV7M_EXCP_SYSTICK 15
+/* ARM-specific interrupt pending bits. */
+#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1
+
+
typedef void ARMWriteCPFunc(void *opaque, int cp_info,
- int srcreg, int operand, uint32_t value);
+ int srcreg, int operand, uint32_t value,
+ void *retaddr);
typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
- int dstreg, int operand);
+ int dstreg, int operand,
+ void *retaddr);
struct arm_boot_info;
@@ -82,9 +89,9 @@ typedef struct CPUARMState {
uint32_t spsr;
/* Banked registers. */
- uint32_t banked_spsr[6];
- uint32_t banked_r13[6];
- uint32_t banked_r14[6];
+ uint32_t banked_spsr[7];
+ uint32_t banked_r13[7];
+ uint32_t banked_r14[7];
/* These hold r8-r12. */
uint32_t usr_regs[5];
@@ -112,6 +119,9 @@ typedef struct CPUARMState {
uint32_t c1_sys; /* System control register. */
uint32_t c1_coproc; /* Coprocessor access register. */
uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */
+ uint32_t c1_secfg; /* Secure configuration register. */
+ uint32_t c1_sedbg; /* Secure debug enable register. */
+ uint32_t c1_nseac; /* Non-secure access control register. */
uint32_t c2_base0; /* MMU translation table base 0. */
uint32_t c2_base1; /* MMU translation table base 1. */
uint32_t c2_control; /* MMU translation table base control. */
@@ -126,8 +136,14 @@ typedef struct CPUARMState {
uint32_t c6_region[8]; /* MPU base/size registers. */
uint32_t c6_insn; /* Fault address registers. */
uint32_t c6_data;
+ uint32_t c7_par; /* Translation result. */
uint32_t c9_insn; /* Cache lockdown registers. */
uint32_t c9_data;
+ uint32_t c9_pmcr_data; /* Performance Monitor Control Register */
+ uint32_t c9_useren; /* user enable register */
+ uint32_t c9_inten; /* interrupt enable set/clear register */
+ uint32_t c12_vbar; /* secure/nonsecure vector base address register. */
+ uint32_t c12_mvbar; /* monitor vector base address register. */
uint32_t c13_fcse; /* FCSE PID. */
uint32_t c13_context; /* Context ID. */
uint32_t c13_tls1; /* User RW Thread register. */
@@ -148,15 +164,10 @@ typedef struct CPUARMState {
int current_sp;
int exception;
int pending_exception;
- void *nvic;
} v7m;
- /* Coprocessor IO used by peripherals */
- struct {
- ARMReadCPFunc *cp_read;
- ARMWriteCPFunc *cp_write;
- void *opaque;
- } cp[15];
+ /* Minimal set of debug coprocessor state (cp14) */
+ uint32_t cp14_dbgdidr;
/* Thumb-2 EE state. */
uint32_t teecr;
@@ -165,10 +176,6 @@ typedef struct CPUARMState {
/* Internal CPU feature flags. */
uint32_t features;
- /* Callback for vectored interrupt controller. */
- int (*get_irq_vector)(struct CPUARMState *);
- void *irq_opaque;
-
/* VFP coprocessor state. */
struct {
float64 regs[32];
@@ -181,12 +188,27 @@ typedef struct CPUARMState {
/* scratch space when Tn are not sufficient. */
uint32_t scratch[8];
+ /* fp_status is the "normal" fp status. standard_fp_status retains
+ * values corresponding to the ARM "Standard FPSCR Value", ie
+ * default-NaN, flush-to-zero, round-to-nearest and is used by
+ * any operations (generally Neon) which the architecture defines
+ * as controlled by the standard FPSCR value rather than the FPSCR.
+ *
+ * To avoid having to transfer exception bits around, we simply
+ * say that the FPSCR cumulative exception flags are the logical
+ * OR of the flags in the two fp statuses. This relies on the
+ * only thing which needs to read the exception flags being
+ * an explicit FPSCR read.
+ */
float_status fp_status;
+ float_status standard_fp_status;
} vfp;
+ uint32_t exclusive_addr;
+ uint32_t exclusive_val;
+ uint32_t exclusive_high;
#if defined(CONFIG_USER_ONLY)
- struct mmon_state *mmon_entry;
-#else
- uint32_t mmon_addr;
+ uint32_t exclusive_test;
+ uint32_t exclusive_info;
#endif
/* iwMMXt coprocessor state. */
@@ -205,6 +227,14 @@ typedef struct CPUARMState {
CPU_COMMON
/* These fields after the common ones so they are preserved on reset. */
+
+ /* Coprocessor IO used by peripherals */
+ struct {
+ ARMReadCPFunc *cp_read;
+ ARMWriteCPFunc *cp_write;
+ void *opaque;
+ } cp[15];
+ void *nvic;
struct arm_boot_info *boot_info;
} CPUARMState;
@@ -223,9 +253,8 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo,
void *puc);
int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
int mmu_idx, int is_softmuu);
+#define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault
-void cpu_lock(void);
-void cpu_unlock(void);
static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
{
env->cp15.c13_tls2 = newtls;
@@ -299,11 +328,16 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
}
}
+/* Return the current FPSCR value. */
+uint32_t vfp_get_fpscr(CPUARMState *env);
+void vfp_set_fpscr(CPUARMState *env, uint32_t val);
+
enum arm_cpu_mode {
ARM_CPU_MODE_USR = 0x10,
ARM_CPU_MODE_FIQ = 0x11,
ARM_CPU_MODE_IRQ = 0x12,
ARM_CPU_MODE_SVC = 0x13,
+ ARM_CPU_MODE_SMC = 0x16,
ARM_CPU_MODE_ABT = 0x17,
ARM_CPU_MODE_UND = 0x1b,
ARM_CPU_MODE_SYS = 0x1f
@@ -339,11 +373,17 @@ enum arm_features {
ARM_FEATURE_THUMB2,
ARM_FEATURE_MPU, /* Only has Memory Protection Unit, not full MMU. */
ARM_FEATURE_VFP3,
+ ARM_FEATURE_VFP_FP16,
ARM_FEATURE_NEON,
ARM_FEATURE_DIV,
ARM_FEATURE_M, /* Microcontroller profile. */
ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */
- ARM_FEATURE_THUMB2EE
+ ARM_FEATURE_THUMB2EE,
+ ARM_FEATURE_V7MP, /* v7 Multiprocessing Extensions */
+ ARM_FEATURE_V4T,
+ ARM_FEATURE_V5,
+ ARM_FEATURE_STRONGARM,
+ ARM_FEATURE_TRUSTZONE, /* TrustZone Security Extensions. */
};
static inline int arm_feature(CPUARMState *env, int feature)
@@ -351,7 +391,7 @@ static inline int arm_feature(CPUARMState *env, int feature)
return (env->features & (1u << feature)) != 0;
}
-void arm_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
/* Interface between CPU and Interrupt controller. */
void armv7m_nvic_set_pending(void *opaque, int irq);
@@ -374,6 +414,8 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
#define ARM_CPUID_ARM946 0x41059461
#define ARM_CPUID_TI915T 0x54029152
#define ARM_CPUID_TI925T 0x54029252
+#define ARM_CPUID_SA1100 0x4401A11B
+#define ARM_CPUID_SA1110 0x6901B119
#define ARM_CPUID_PXA250 0x69052100
#define ARM_CPUID_PXA255 0x69052d00
#define ARM_CPUID_PXA260 0x69052903
@@ -390,6 +432,7 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
#define ARM_CPUID_ARM1136_R2 0x4107b362
#define ARM_CPUID_ARM11MPCORE 0x410fb022
#define ARM_CPUID_CORTEXA8 0x410fc080
+#define ARM_CPUID_CORTEXA8_R2 0x412fc083
#define ARM_CPUID_CORTEXA9 0x410fc090
#define ARM_CPUID_CORTEXM3 0x410fc231
#define ARM_CPUID_ANY 0xffffffff
@@ -403,13 +446,16 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
#define TARGET_PAGE_BITS 10
#endif
+#define TARGET_PHYS_ADDR_SPACE_BITS 32
+#define TARGET_VIRT_ADDR_SPACE_BITS 32
+
#define cpu_init cpu_arm_init
#define cpu_exec cpu_arm_exec
#define cpu_gen_code cpu_arm_gen_code
#define cpu_signal_handler cpu_arm_signal_handler
#define cpu_list arm_cpu_list
-#define CPU_SAVE_VERSION 1
+#define CPU_SAVE_VERSION 3
/* MMU modes definitions */
#define MMU_MODE0_SUFFIX _kernel
@@ -439,24 +485,57 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
#endif
#include "cpu-all.h"
-#include "exec-all.h"
-static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
-{
- env->regs[15] = tb->pc;
-}
+/* Bit usage in the TB flags field: */
+#define ARM_TBFLAG_THUMB_SHIFT 0
+#define ARM_TBFLAG_THUMB_MASK (1 << ARM_TBFLAG_THUMB_SHIFT)
+#define ARM_TBFLAG_VECLEN_SHIFT 1
+#define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
+#define ARM_TBFLAG_VECSTRIDE_SHIFT 4
+#define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
+#define ARM_TBFLAG_PRIV_SHIFT 6
+#define ARM_TBFLAG_PRIV_MASK (1 << ARM_TBFLAG_PRIV_SHIFT)
+#define ARM_TBFLAG_VFPEN_SHIFT 7
+#define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT)
+#define ARM_TBFLAG_CONDEXEC_SHIFT 8
+#define ARM_TBFLAG_CONDEXEC_MASK (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
+/* Bits 31..16 are currently unused. */
+
+/* some convenience accessor macros */
+#define ARM_TBFLAG_THUMB(F) \
+ (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
+#define ARM_TBFLAG_VECLEN(F) \
+ (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
+#define ARM_TBFLAG_VECSTRIDE(F) \
+ (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
+#define ARM_TBFLAG_PRIV(F) \
+ (((F) & ARM_TBFLAG_PRIV_MASK) >> ARM_TBFLAG_PRIV_SHIFT)
+#define ARM_TBFLAG_VFPEN(F) \
+ (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
+#define ARM_TBFLAG_CONDEXEC(F) \
+ (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
target_ulong *cs_base, int *flags)
{
+ int privmode;
*pc = env->regs[15];
*cs_base = 0;
- *flags = env->thumb | (env->vfp.vec_len << 1)
- | (env->vfp.vec_stride << 4) | (env->condexec_bits << 8);
- if ((env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR)
- *flags |= (1 << 6);
- if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30))
- *flags |= (1 << 7);
+ *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
+ | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
+ | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
+ | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT);
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ privmode = !((env->v7m.exception == 0) && (env->v7m.control & 1));
+ } else {
+ privmode = (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR;
+ }
+ if (privmode) {
+ *flags |= ARM_TBFLAG_PRIV_MASK;
+ }
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+ *flags |= ARM_TBFLAG_VFPEN_MASK;
+ }
}
#endif
diff --git a/target-arm/exec.h b/target-arm/exec.h
index 710a2f9..07bfd57 100644
--- a/target-arm/exec.h
+++ b/target-arm/exec.h
@@ -14,17 +14,12 @@
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "dyngen-exec.h"
register struct CPUARMState *env asm(AREG0);
-register uint32_t T0 asm(AREG1);
-register uint32_t T1 asm(AREG2);
-
-#define M0 env->iwmmxt.val
#include "cpu.h"
#include "exec-all.h"
@@ -61,3 +56,8 @@ static inline int cpu_halted(CPUState *env) {
#endif
void raise_exception(int);
+static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
+{
+ env->regs[15] = tb->pc;
+}
+
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 154aa46..3731029 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5,11 +5,21 @@
#include "cpu.h"
#include "exec-all.h"
#include "gdbstub.h"
-#include "helpers.h"
+#include "helper.h"
#include "qemu-common.h"
+#include "host-utils.h"
+#if !defined(CONFIG_USER_ONLY)
+//#include "hw/loader.h"
#ifdef CONFIG_TRACE
#include "android-trace.h"
#endif
+#endif
+
+static uint32_t cortexa9_cp15_c0_c1[8] =
+{ 0x1031, 0x11, 0x000, 0, 0x00100103, 0x20000000, 0x01230000, 0x00002111 };
+
+static uint32_t cortexa9_cp15_c0_c2[8] =
+{ 0x00101111, 0x13112111, 0x21232041, 0x11112131, 0x00111142, 0, 0, 0 };
static uint32_t cortexa8_cp15_c0_c1[8] =
{ 0x1031, 0x11, 0x400, 0, 0x31100003, 0x20000000, 0x01202000, 0x11 };
@@ -17,6 +27,9 @@ static uint32_t cortexa8_cp15_c0_c1[8] =
static uint32_t cortexa8_cp15_c0_c2[8] =
{ 0x00101111, 0x12112111, 0x21232031, 0x11112131, 0x00111142, 0, 0, 0 };
+static uint32_t cortexa8r2_cp15_c0_c2[8] =
+{ 0x00101111, 0x12112111, 0x21232031, 0x11112131, 0x00011142, 0, 0, 0 };
+
static uint32_t mpcore_cp15_c0_c1[8] =
{ 0x111, 0x1, 0, 0x2, 0x01100103, 0x10020302, 0x01222000, 0 };
@@ -41,17 +54,23 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
env->cp15.c0_cpuid = id;
switch (id) {
case ARM_CPUID_ARM926:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_VFP);
env->vfp.xregs[ARM_VFP_FPSID] = 0x41011090;
env->cp15.c0_cachetype = 0x1dd20d2;
env->cp15.c1_sys = 0x00090078;
break;
case ARM_CPUID_ARM946:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_MPU);
env->cp15.c0_cachetype = 0x0f004006;
env->cp15.c1_sys = 0x00000078;
break;
case ARM_CPUID_ARM1026:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_VFP);
set_feature(env, ARM_FEATURE_AUXCR);
env->vfp.xregs[ARM_VFP_FPSID] = 0x410110a0;
@@ -60,6 +79,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
break;
case ARM_CPUID_ARM1136_R2:
case ARM_CPUID_ARM1136:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_V6);
set_feature(env, ARM_FEATURE_VFP);
set_feature(env, ARM_FEATURE_AUXCR);
@@ -69,8 +90,11 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
memcpy(env->cp15.c0_c1, arm1136_cp15_c0_c1, 8 * sizeof(uint32_t));
memcpy(env->cp15.c0_c2, arm1136_cp15_c0_c2, 8 * sizeof(uint32_t));
env->cp15.c0_cachetype = 0x1dd20d2;
+ env->cp15.c1_sys = 0x00050078;
break;
case ARM_CPUID_ARM11MPCORE:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_V6);
set_feature(env, ARM_FEATURE_V6K);
set_feature(env, ARM_FEATURE_VFP);
@@ -83,6 +107,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
env->cp15.c0_cachetype = 0x1dd20d2;
break;
case ARM_CPUID_CORTEXA8:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_V6);
set_feature(env, ARM_FEATURE_V6K);
set_feature(env, ARM_FEATURE_V7);
@@ -92,6 +118,7 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
set_feature(env, ARM_FEATURE_VFP3);
set_feature(env, ARM_FEATURE_NEON);
set_feature(env, ARM_FEATURE_THUMB2EE);
+ set_feature(env, ARM_FEATURE_TRUSTZONE);
env->vfp.xregs[ARM_VFP_FPSID] = 0x410330c0;
env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222;
env->vfp.xregs[ARM_VFP_MVFR1] = 0x00011100;
@@ -102,8 +129,66 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
env->cp15.c0_ccsid[0] = 0xe007e01a; /* 16k L1 dcache. */
env->cp15.c0_ccsid[1] = 0x2007e01a; /* 16k L1 icache. */
env->cp15.c0_ccsid[2] = 0xf0000000; /* No L2 icache. */
+ env->cp15.c1_sys = 0x00c50078;
+ break;
+ case ARM_CPUID_CORTEXA8_R2:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
+ set_feature(env, ARM_FEATURE_V6);
+ set_feature(env, ARM_FEATURE_V6K);
+ set_feature(env, ARM_FEATURE_V7);
+ set_feature(env, ARM_FEATURE_AUXCR);
+ set_feature(env, ARM_FEATURE_THUMB2);
+ set_feature(env, ARM_FEATURE_VFP);
+ set_feature(env, ARM_FEATURE_VFP3);
+ set_feature(env, ARM_FEATURE_NEON);
+ set_feature(env, ARM_FEATURE_THUMB2EE);
+ set_feature(env, ARM_FEATURE_TRUSTZONE);
+ env->vfp.xregs[ARM_VFP_FPSID] = 0x410330c2;
+ env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222;
+ env->vfp.xregs[ARM_VFP_MVFR1] = 0x00011111;
+ memcpy(env->cp15.c0_c1, cortexa8_cp15_c0_c1, 8 * sizeof(uint32_t));
+ memcpy(env->cp15.c0_c2, cortexa8r2_cp15_c0_c2, 8 * sizeof(uint32_t));
+ env->cp15.c0_cachetype = 0x82048004;
+ env->cp15.c0_clid = (1 << 27) | (2 << 24) | (4 << 3) | 3;
+ env->cp15.c0_ccsid[0] = 0xe007e01a; /* 16k L1 dcache. */
+ env->cp15.c0_ccsid[1] = 0x2007e01a; /* 16k L1 icache. */
+ env->cp15.c0_ccsid[2] = 0xf03fe03a; /* 256k L2 cache. */
+ env->cp15.c1_sys = 0x00c50078;
+ break;
+ case ARM_CPUID_CORTEXA9:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
+ set_feature(env, ARM_FEATURE_V6);
+ set_feature(env, ARM_FEATURE_V6K);
+ set_feature(env, ARM_FEATURE_V7);
+ set_feature(env, ARM_FEATURE_AUXCR);
+ set_feature(env, ARM_FEATURE_THUMB2);
+ set_feature(env, ARM_FEATURE_VFP);
+ set_feature(env, ARM_FEATURE_VFP3);
+ set_feature(env, ARM_FEATURE_VFP_FP16);
+ set_feature(env, ARM_FEATURE_NEON);
+ set_feature(env, ARM_FEATURE_THUMB2EE);
+ /* Note that A9 supports the MP extensions even for
+ * A9UP and single-core A9MP (which are both different
+ * and valid configurations; we don't model A9UP).
+ */
+ set_feature(env, ARM_FEATURE_V7MP);
+ set_feature(env, ARM_FEATURE_TRUSTZONE);
+ env->vfp.xregs[ARM_VFP_FPSID] = 0x41034000; /* Guess */
+ env->vfp.xregs[ARM_VFP_MVFR0] = 0x11110222;
+ env->vfp.xregs[ARM_VFP_MVFR1] = 0x01111111;
+ memcpy(env->cp15.c0_c1, cortexa9_cp15_c0_c1, 8 * sizeof(uint32_t));
+ memcpy(env->cp15.c0_c2, cortexa9_cp15_c0_c2, 8 * sizeof(uint32_t));
+ env->cp15.c0_cachetype = 0x80038003;
+ env->cp15.c0_clid = (1 << 27) | (1 << 24) | 3;
+ env->cp15.c0_ccsid[0] = 0xe00fe015; /* 16k L1 dcache. */
+ env->cp15.c0_ccsid[1] = 0x200fe015; /* 16k L1 icache. */
+ env->cp15.c1_sys = 0x00c50078;
break;
case ARM_CPUID_CORTEXM3:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_V6);
set_feature(env, ARM_FEATURE_THUMB2);
set_feature(env, ARM_FEATURE_V7);
@@ -111,18 +196,23 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
set_feature(env, ARM_FEATURE_DIV);
break;
case ARM_CPUID_ANY: /* For userspace emulation. */
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_V6);
set_feature(env, ARM_FEATURE_V6K);
set_feature(env, ARM_FEATURE_V7);
set_feature(env, ARM_FEATURE_THUMB2);
set_feature(env, ARM_FEATURE_VFP);
set_feature(env, ARM_FEATURE_VFP3);
+ set_feature(env, ARM_FEATURE_VFP_FP16);
set_feature(env, ARM_FEATURE_NEON);
set_feature(env, ARM_FEATURE_THUMB2EE);
set_feature(env, ARM_FEATURE_DIV);
+ set_feature(env, ARM_FEATURE_V7MP);
break;
case ARM_CPUID_TI915T:
case ARM_CPUID_TI925T:
+ set_feature(env, ARM_FEATURE_V4T);
set_feature(env, ARM_FEATURE_OMAPCP);
env->cp15.c0_cpuid = ARM_CPUID_TI925T; /* Depends on wiring. */
env->cp15.c0_cachetype = 0x5109149;
@@ -135,6 +225,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
case ARM_CPUID_PXA260:
case ARM_CPUID_PXA261:
case ARM_CPUID_PXA262:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_XSCALE);
/* JTAG_ID is ((id << 28) | 0x09265013) */
env->cp15.c0_cachetype = 0xd172172;
@@ -146,6 +238,8 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
case ARM_CPUID_PXA270_B1:
case ARM_CPUID_PXA270_C0:
case ARM_CPUID_PXA270_C5:
+ set_feature(env, ARM_FEATURE_V4T);
+ set_feature(env, ARM_FEATURE_V5);
set_feature(env, ARM_FEATURE_XSCALE);
/* JTAG_ID is ((id << 28) | 0x09265013) */
set_feature(env, ARM_FEATURE_IWMMXT);
@@ -153,6 +247,11 @@ static void cpu_reset_model_id(CPUARMState *env, uint32_t id)
env->cp15.c0_cachetype = 0xd172172;
env->cp15.c1_sys = 0x00000078;
break;
+ case ARM_CPUID_SA1100:
+ case ARM_CPUID_SA1110:
+ set_feature(env, ARM_FEATURE_STRONGARM);
+ env->cp15.c1_sys = 0x00000070;
+ break;
default:
cpu_abort(env, "Bad CPU ID: %x\n", id);
break;
@@ -172,20 +271,52 @@ void cpu_reset(CPUARMState *env)
memset(env, 0, offsetof(CPUARMState, breakpoints));
if (id)
cpu_reset_model_id(env, id);
+ /* DBGDIDR : we implement nothing, and just mirror the main ID
+ * register's Variant and Revision fields.
+ */
+ env->cp14_dbgdidr = (id >> 16 & 0xf0) | 0xf;
#if defined (CONFIG_USER_ONLY)
env->uncached_cpsr = ARM_CPU_MODE_USR;
+ /* For user mode we must enable access to coprocessors */
env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30;
+ if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
+ env->cp15.c15_cpar = 3;
+ } else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ env->cp15.c15_cpar = 1;
+ }
#else
/* SVC mode with interrupts disabled. */
env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
/* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is
- clear at reset. */
- if (IS_M(env))
+ clear at reset. Initial SP and PC are loaded from ROM. */
+ if (IS_M(env)) {
+ uint32_t pc;
+ uint8_t *rom;
env->uncached_cpsr &= ~CPSR_I;
+#ifndef CONFIG_ANDROID /* No hw/loader.h and no ROM support for now on Android */
+ rom = rom_ptr(0);
+ if (rom) {
+ /* We should really use ldl_phys here, in case the guest
+ modified flash and reset itself. However images
+ loaded via -kenrel have not been copied yet, so load the
+ values directly from there. */
+ env->regs[13] = ldl_p(rom);
+ pc = ldl_p(rom + 4);
+ env->thumb = pc & 1;
+ env->regs[15] = pc & ~1;
+ }
+#endif
+ }
env->vfp.xregs[ARM_VFP_FPEXC] = 0;
env->cp15.c2_base_mask = 0xffffc000u;
#endif
- env->regs[15] = 0;
+ set_flush_to_zero(1, &env->vfp.standard_fp_status);
+ set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
+ set_default_nan_mode(1, &env->vfp.standard_fp_status);
+ set_float_detect_tininess(float_tininess_before_rounding,
+ &env->vfp.fp_status);
+ set_float_detect_tininess(float_tininess_before_rounding,
+ &env->vfp.standard_fp_status);
tlb_flush(env, 1);
}
@@ -236,7 +367,7 @@ static int vfp_gdb_set_reg(CPUState *env, uint8_t *buf, int reg)
switch (reg - nregs) {
case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4;
- case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf); return 4;
+ case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
}
return 0;
}
@@ -288,8 +419,12 @@ static const struct arm_cpu_t arm_cpu_names[] = {
{ ARM_CPUID_ARM11MPCORE, "arm11mpcore"},
{ ARM_CPUID_CORTEXM3, "cortex-m3"},
{ ARM_CPUID_CORTEXA8, "cortex-a8"},
+ { ARM_CPUID_CORTEXA8_R2, "cortex-a8-r2"},
+ { ARM_CPUID_CORTEXA9, "cortex-a9"},
{ ARM_CPUID_TI925T, "ti925t" },
{ ARM_CPUID_PXA250, "pxa250" },
+ { ARM_CPUID_SA1100, "sa1100" },
+ { ARM_CPUID_SA1110, "sa1110" },
{ ARM_CPUID_PXA255, "pxa255" },
{ ARM_CPUID_PXA260, "pxa260" },
{ ARM_CPUID_PXA261, "pxa261" },
@@ -305,7 +440,7 @@ static const struct arm_cpu_t arm_cpu_names[] = {
{ 0, NULL}
};
-void arm_cpu_list(FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
{
int i;
@@ -397,16 +532,15 @@ uint32_t HELPER(uxtb16)(uint32_t x)
uint32_t HELPER(clz)(uint32_t x)
{
- int count;
- for (count = 32; x; count--)
- x >>= 1;
- return count;
+ return clz32(x);
}
int32_t HELPER(sdiv)(int32_t num, int32_t den)
{
if (den == 0)
return 0;
+ if (num == INT_MIN && den == -1)
+ return INT_MIN;
return num / den;
}
@@ -444,16 +578,6 @@ void do_interrupt (CPUState *env)
env->exception_index = -1;
}
-/* Structure used to record exclusive memory locations. */
-typedef struct mmon_state {
- struct mmon_state *next;
- CPUARMState *cpu_env;
- uint32_t addr;
-} mmon_state;
-
-/* Chain of current locks. */
-static mmon_state* mmon_head = NULL;
-
int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
int mmu_idx, int is_softmmu)
{
@@ -467,82 +591,7 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
return 1;
}
-static void allocate_mmon_state(CPUState *env)
-{
- env->mmon_entry = malloc(sizeof (mmon_state));
- memset (env->mmon_entry, 0, sizeof (mmon_state));
- env->mmon_entry->cpu_env = env;
- mmon_head = env->mmon_entry;
-}
-
-/* Flush any monitor locks for the specified address. */
-static void flush_mmon(uint32_t addr)
-{
- mmon_state *mon;
-
- for (mon = mmon_head; mon; mon = mon->next)
- {
- if (mon->addr != addr)
- continue;
-
- mon->addr = 0;
- break;
- }
-}
-
-/* Mark an address for exclusive access. */
-void HELPER(mark_exclusive)(CPUState *env, uint32_t addr)
-{
- if (!env->mmon_entry)
- allocate_mmon_state(env);
- /* Clear any previous locks. */
- flush_mmon(addr);
- env->mmon_entry->addr = addr;
-}
-
-/* Test if an exclusive address is still exclusive. Returns zero
- if the address is still exclusive. */
-uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr)
-{
- int res;
-
- if (!env->mmon_entry)
- return 1;
- if (env->mmon_entry->addr == addr)
- res = 0;
- else
- res = 1;
- flush_mmon(addr);
- return res;
-}
-
-void HELPER(clrex)(CPUState *env)
-{
- if (!(env->mmon_entry && env->mmon_entry->addr))
- return;
- flush_mmon(env->mmon_entry->addr);
-}
-
-target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
-{
- return addr;
-}
-
/* These should probably raise undefined insn exceptions. */
-void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val)
-{
- int op1 = (insn >> 8) & 0xf;
- cpu_abort(env, "cp%i insn %08x\n", op1, insn);
- return;
-}
-
-uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
-{
- int op1 = (insn >> 8) & 0xf;
- cpu_abort(env, "cp%i insn %08x\n", op1, insn);
- return 0;
-}
-
void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
{
cpu_abort(env, "cp15 insn %08x\n", insn);
@@ -604,6 +653,8 @@ static inline int bank_number (int mode)
return 4;
case ARM_CPU_MODE_FIQ:
return 5;
+ case ARM_CPU_MODE_SMC:
+ return 6;
}
cpu_abort(cpu_single_env, "Bad mode %x\n", mode);
return -1;
@@ -670,7 +721,7 @@ static void do_v7m_exception_exit(CPUARMState *env)
type = env->regs[15];
if (env->v7m.exception != 0)
- armv7m_nvic_complete_irq(env->v7m.nvic, env->v7m.exception);
+ armv7m_nvic_complete_irq(env->nvic, env->v7m.exception);
/* Switch to the target stack. */
switch_v7m_sp(env, (type & 4) != 0);
@@ -712,15 +763,15 @@ static void do_interrupt_v7m(CPUARMState *env)
one we're raising. */
switch (env->exception_index) {
case EXCP_UDEF:
- armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_USAGE);
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
return;
case EXCP_SWI:
env->regs[15] += 2;
- armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_SVC);
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC);
return;
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
- armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_MEM);
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
return;
case EXCP_BKPT:
if (semihosting_enabled) {
@@ -732,10 +783,10 @@ static void do_interrupt_v7m(CPUARMState *env)
return;
}
}
- armv7m_nvic_set_pending(env->v7m.nvic, ARMV7M_EXCP_DEBUG);
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG);
return;
case EXCP_IRQ:
- env->v7m.exception = armv7m_nvic_acknowledge_irq(env->v7m.nvic);
+ env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic);
break;
case EXCP_EXCEPTION_EXIT:
do_v7m_exception_exit(env);
@@ -859,23 +910,52 @@ void do_interrupt(CPUARMState *env)
mask = CPSR_A | CPSR_I | CPSR_F;
offset = 4;
break;
+ case EXCP_SMC:
+ if (semihosting_enabled) {
+ cpu_abort(env, "SMC handling under semihosting not implemented\n");
+ return;
+ }
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SMC) {
+ env->cp15.c1_secfg &= ~1;
+ }
+ offset = env->thumb ? 2 : 0;
+ new_mode = ARM_CPU_MODE_SMC;
+ addr = 0x08;
+ mask = CPSR_A | CPSR_I | CPSR_F;
+ break;
default:
cpu_abort(env, "Unhandled exception 0x%x\n", env->exception_index);
return; /* Never happens. Keep compiler happy. */
}
+ if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+ if (new_mode == ARM_CPU_MODE_SMC ||
+ (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SMC) {
+ addr += env->cp15.c12_mvbar;
+ } else {
+ if (env->cp15.c1_sys & (1 << 13)) {
+ addr += 0xffff0000;
+ } else {
+ addr += env->cp15.c12_vbar;
+ }
+ }
+ } else {
/* High vectors. */
if (env->cp15.c1_sys & (1 << 13)) {
addr += 0xffff0000;
+ }
}
switch_mode (env, new_mode);
env->spsr = cpsr_read(env);
/* Clear IT bits. */
env->condexec_bits = 0;
- /* Switch to the new mode, and switch to Arm mode. */
- /* ??? Thumb interrupt handlers not implemented. */
+ /* Switch to the new mode, and to the correct instruction set. */
env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
env->uncached_cpsr |= mask;
- env->thumb = 0;
+ /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
+ * and we should just guard the thumb mode on V4 */
+ if (arm_feature(env, ARM_FEATURE_V4T)) {
+ env->thumb = (env->cp15.c1_sys & (1 << 30)) != 0;
+ }
env->regs[14] = env->regs[15] + offset;
env->regs[15] = addr;
env->interrupt_request |= CPU_INTERRUPT_EXITTB;
@@ -947,7 +1027,8 @@ static uint32_t get_level1_table_address(CPUState *env, uint32_t address)
}
static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
- int is_user, uint32_t *phys_ptr, int *prot)
+ int is_user, uint32_t *phys_ptr, int *prot,
+ target_ulong *page_size)
{
int code;
uint32_t table;
@@ -980,6 +1061,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
ap = (desc >> 10) & 3;
code = 13;
+ *page_size = 1024 * 1024;
} else {
/* Lookup l2 entry. */
if (type == 1) {
@@ -997,10 +1079,12 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
case 1: /* 64k page. */
phys_addr = (desc & 0xffff0000) | (address & 0xffff);
ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+ *page_size = 0x10000;
break;
case 2: /* 4k page. */
phys_addr = (desc & 0xfffff000) | (address & 0xfff);
ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+ *page_size = 0x1000;
break;
case 3: /* 1k page. */
if (type == 1) {
@@ -1015,6 +1099,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
phys_addr = (desc & 0xfffffc00) | (address & 0x3ff);
}
ap = (desc >> 4) & 3;
+ *page_size = 0x400;
break;
default:
/* Never happens, but compiler isn't smart enough to tell. */
@@ -1027,6 +1112,7 @@ static int get_phys_addr_v5(CPUState *env, uint32_t address, int access_type,
/* Access permission fault. */
goto do_fault;
}
+ *prot |= PAGE_EXEC;
*phys_ptr = phys_addr;
return 0;
do_fault:
@@ -1034,7 +1120,8 @@ do_fault:
}
static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
- int is_user, uint32_t *phys_ptr, int *prot)
+ int is_user, uint32_t *phys_ptr, int *prot,
+ target_ulong *page_size)
{
int code;
uint32_t table;
@@ -1050,7 +1137,7 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
table = get_level1_table_address(env, address);
desc = ldl_phys(table);
type = (desc & 3);
- if (type == 0) {
+ if (type == 0 || type == 3) {
/* Section translation fault. */
code = 5;
domain = 0;
@@ -1074,9 +1161,11 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
if (desc & (1 << 18)) {
/* Supersection. */
phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
+ *page_size = 0x1000000;
} else {
/* Section. */
phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
+ *page_size = 0x100000;
}
ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
xn = desc & (1 << 4);
@@ -1093,10 +1182,12 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
case 1: /* 64k page. */
phys_addr = (desc & 0xffff0000) | (address & 0xffff);
xn = desc & (1 << 15);
+ *page_size = 0x10000;
break;
case 2: case 3: /* 4k page. */
phys_addr = (desc & 0xfffff000) | (address & 0xfff);
xn = desc & 1;
+ *page_size = 0x1000;
break;
default:
/* Never happens, but compiler isn't smart enough to tell. */
@@ -1104,6 +1195,9 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
}
code = 15;
}
+ if (domain == 3) {
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ } else {
if (xn && access_type == 2)
goto do_fault;
@@ -1117,6 +1211,10 @@ static int get_phys_addr_v6(CPUState *env, uint32_t address, int access_type,
if (!*prot) {
/* Access permission fault. */
goto do_fault;
+ }
+ if (!xn) {
+ *prot |= PAGE_EXEC;
+ }
}
*phys_ptr = phys_addr;
return 0;
@@ -1180,12 +1278,22 @@ static int get_phys_addr_mpu(CPUState *env, uint32_t address, int access_type,
/* Bad permission. */
return 1;
}
+ *prot |= PAGE_EXEC;
return 0;
}
-static inline int get_phys_addr(CPUState *env, uint32_t address,
+#ifdef CONFIG_GLES2
+int get_phys_addr(CPUState *env, uint32_t address,
+ int access_type, int is_user,
+ uint32_t *phys_ptr, int *prot,
+ target_ulong *page_size);
+#else
+static
+#endif
+int get_phys_addr(CPUState *env, uint32_t address,
int access_type, int is_user,
- uint32_t *phys_ptr, int *prot)
+ uint32_t *phys_ptr, int *prot,
+ target_ulong *page_size)
{
/* Fast Context Switch Extension. */
if (address < 0x02000000)
@@ -1194,35 +1302,39 @@ static inline int get_phys_addr(CPUState *env, uint32_t address,
if ((env->cp15.c1_sys & 1) == 0) {
/* MMU/MPU disabled. */
*phys_ptr = address;
- *prot = PAGE_READ | PAGE_WRITE;
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ *page_size = TARGET_PAGE_SIZE;
return 0;
} else if (arm_feature(env, ARM_FEATURE_MPU)) {
+ *page_size = TARGET_PAGE_SIZE;
return get_phys_addr_mpu(env, address, access_type, is_user, phys_ptr,
prot);
} else if (env->cp15.c1_sys & (1 << 23)) {
return get_phys_addr_v6(env, address, access_type, is_user, phys_ptr,
- prot);
+ prot, page_size);
} else {
return get_phys_addr_v5(env, address, access_type, is_user, phys_ptr,
- prot);
+ prot, page_size);
}
}
int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
int access_type, int mmu_idx, int is_softmmu)
{
- uint32_t phys_addr = 0;
+ uint32_t phys_addr;
+ target_ulong page_size;
int prot;
int ret, is_user;
is_user = mmu_idx == MMU_USER_IDX;
- ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot);
+ ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
+ &page_size);
if (ret == 0) {
/* Map a single [sub]page. */
phys_addr &= ~(uint32_t)0x3ff;
address &= ~(uint32_t)0x3ff;
- return tlb_set_page (env, address, phys_addr, prot, mmu_idx,
- is_softmmu);
+ tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
+ return 0;
}
if (access_type == 2) {
@@ -1241,11 +1353,12 @@ int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address,
target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
{
- uint32_t phys_addr = 0;
+ uint32_t phys_addr;
+ target_ulong page_size;
int prot;
int ret;
- ret = get_phys_addr(env, addr, 0, 0, &phys_addr, &prot);
+ ret = get_phys_addr(env, addr, 0, 0, &phys_addr, &prot, &page_size);
if (ret != 0)
return -1;
@@ -1253,49 +1366,6 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
return phys_addr;
}
-/* Not really implemented. Need to figure out a sane way of doing this.
- Maybe add generic watchpoint support and use that. */
-
-void HELPER(mark_exclusive)(CPUState *env, uint32_t addr)
-{
- env->mmon_addr = addr;
-}
-
-uint32_t HELPER(test_exclusive)(CPUState *env, uint32_t addr)
-{
- return (env->mmon_addr != addr);
-}
-
-void HELPER(clrex)(CPUState *env)
-{
- env->mmon_addr = -1;
-}
-
-void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val)
-{
- int cp_num = (insn >> 8) & 0xf;
- int cp_info = (insn >> 5) & 7;
- int src = (insn >> 16) & 0xf;
- int operand = insn & 0xf;
-
- if (env->cp[cp_num].cp_write)
- env->cp[cp_num].cp_write(env->cp[cp_num].opaque,
- cp_info, src, operand, val);
-}
-
-uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
-{
- int cp_num = (insn >> 8) & 0xf;
- int cp_info = (insn >> 5) & 7;
- int dest = (insn >> 16) & 0xf;
- int operand = insn & 0xf;
-
- if (env->cp[cp_num].cp_read)
- return env->cp[cp_num].cp_read(env->cp[cp_num].opaque,
- cp_info, dest, operand);
- return 0;
-}
-
/* Return basic MPU access permission bits. */
static uint32_t simple_mpu_ap_bits(uint32_t val)
{
@@ -1349,17 +1419,19 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
}
goto bad_reg;
case 1: /* System configuration. */
+ switch (crm) {
+ case 0:
if (arm_feature(env, ARM_FEATURE_OMAPCP))
op2 = 0;
switch (op2) {
case 0:
- if (!arm_feature(env, ARM_FEATURE_XSCALE) || crm == 0)
+ if (!arm_feature(env, ARM_FEATURE_XSCALE))
env->cp15.c1_sys = val;
/* ??? Lots of these bits are not implemented. */
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(env, 1);
break;
- case 1: /* Auxiliary cotrol register. */
+ case 1: /* Auxiliary control register. */
if (arm_feature(env, ARM_FEATURE_XSCALE)) {
env->cp15.c1_xscaleauxcr = val;
break;
@@ -1373,6 +1445,34 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
env->cp15.c1_coproc = val;
/* ??? Is this safe when called from within a TB? */
tb_flush(env);
+ }
+ break;
+ default:
+ goto bad_reg;
+ }
+ break;
+ case 1:
+ if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)
+ || (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR)
+ goto bad_reg;
+ switch (op2) {
+ case 0: /* Secure configuration register. */
+ if (env->cp15.c1_secfg & 1)
+ goto bad_reg;
+ env->cp15.c1_secfg = val;
+ break;
+ case 1: /* Secure debug enable register. */
+ if (env->cp15.c1_secfg & 1)
+ goto bad_reg;
+ env->cp15.c1_sedbg = val;
+ break;
+ case 2: /* Nonsecure access control register. */
+ if (env->cp15.c1_secfg & 1)
+ goto bad_reg;
+ env->cp15.c1_nseac = val;
+ break;
+ default:
+ goto bad_reg;
}
break;
default:
@@ -1468,8 +1568,49 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
case 7: /* Cache control. */
env->cp15.c15_i_max = 0x000;
env->cp15.c15_i_min = 0xff0;
- /* No cache, so nothing to do. */
- /* ??? MPCore has VA to PA translation functions. */
+ if (op1 != 0) {
+ goto bad_reg;
+ }
+ /* No cache, so nothing to do except VA->PA translations. */
+ if (arm_feature(env, ARM_FEATURE_V6K)) {
+ switch (crm) {
+ case 4:
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ env->cp15.c7_par = val & 0xfffff6ff;
+ } else {
+ env->cp15.c7_par = val & 0xfffff1ff;
+ }
+ break;
+ case 8: {
+ uint32_t phys_addr;
+ target_ulong page_size;
+ int prot;
+ int ret, is_user = op2 & 2;
+ int access_type = op2 & 1;
+
+ if (op2 & 4) {
+ /* Other states are only available with TrustZone */
+ goto bad_reg;
+ }
+ ret = get_phys_addr(env, val, access_type, is_user,
+ &phys_addr, &prot, &page_size);
+ if (ret == 0) {
+ /* We do not set any attribute bits in the PAR */
+ if (page_size == (1 << 24)
+ && arm_feature(env, ARM_FEATURE_V7)) {
+ env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1;
+ } else {
+ env->cp15.c7_par = phys_addr & 0xfffff000;
+ }
+ } else {
+ env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
+ ((ret & (12 << 1)) >> 6) |
+ ((ret & 0xf) << 1) | 1;
+ }
+ break;
+ }
+ }
+ }
break;
case 8: /* MMU TLB control. */
switch (op2) {
@@ -1477,18 +1618,7 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
tlb_flush(env, 0);
break;
case 1: /* Invalidate single TLB entry. */
-#if 0
- /* ??? This is wrong for large pages and sections. */
- /* As an ugly hack to make linux work we always flush a 4K
- pages. */
- val &= 0xfffff000;
- tlb_flush_page(env, val);
- tlb_flush_page(env, val + 0x400);
- tlb_flush_page(env, val + 0x800);
- tlb_flush_page(env, val + 0xc00);
-#else
- tlb_flush(env, 1);
-#endif
+ tlb_flush_page(env, val & TARGET_PAGE_MASK);
break;
case 2: /* Invalidate on ASID. */
tlb_flush(env, val == 0);
@@ -1504,6 +1634,8 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
case 9:
if (arm_feature(env, ARM_FEATURE_OMAPCP))
break;
+ if (arm_feature(env, ARM_FEATURE_STRONGARM))
+ break; /* Ignore ReadBuffer access */
switch (crm) {
case 0: /* Cache lockdown. */
switch (op1) {
@@ -1520,15 +1652,86 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
}
break;
case 1: /* L2 cache. */
- /* Ignore writes to L2 lockdown/auxiliary registers. */
+ switch (op2) {
+ case 0: /* L2 cache lockdown */
+ case 2: /* L2 cache auxiliary control */
+ /* ignore */
+ break;
+ default:
+ goto bad_reg;
+ }
break;
default:
goto bad_reg;
}
break;
case 1: /* TCM memory region registers. */
+ case 2:
/* Not implemented. */
goto bad_reg;
+ case 12: /* performance monitor control */
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ switch (op2) {
+ case 0: /* performance monitor control */
+ env->cp15.c9_pmcr_data = val;
+ break;
+ case 1: /* count enable set */
+ case 2: /* count enable clear */
+ case 3: /* overflow flag status */
+ case 4: /* software increment */
+ case 5: /* performance counter selection */
+ /* not implemented */
+ goto bad_reg;
+ default:
+ goto bad_reg;
+ }
+ } else {
+ goto bad_reg;
+ }
+ break;
+ case 13: /* performance counters */
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ switch (op2) {
+ case 0: /* cycle count */
+ case 1: /* event selection */
+ case 2: /* performance monitor count */
+ /* not implemented */
+ goto bad_reg;
+ default:
+ goto bad_reg;
+ }
+ } else {
+ goto bad_reg;
+ }
+ break;
+ case 14: /* performance monitor control */
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ switch (op2) {
+ case 0: /* user enable */
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
+ goto bad_reg;
+ }
+ env->cp15.c9_useren = val & 1;
+ break;
+ case 1: /* interrupt enable set */
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
+ goto bad_reg;
+ }
+ env->cp15.c9_inten |= val & 0xf;
+ break;
+ case 2: /* interrupt enable clear */
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
+ goto bad_reg;
+ }
+ env->cp15.c9_inten &= ~(val & 0xf);
+ break;
+ default:
+ goto bad_reg;
+ }
+ } else {
+ goto bad_reg;
+ }
+ break;
default:
goto bad_reg;
}
@@ -1537,6 +1740,27 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
/* ??? TLB lockdown not implemented. */
break;
case 12: /* Reserved. */
+ if (!op1 && !crm) {
+ switch (op2) {
+ case 0:
+ if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+ goto bad_reg;
+ }
+ env->cp15.c12_vbar = val & ~0x1f;
+ break;
+ case 1:
+ if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+ goto bad_reg;
+ }
+ if (!(env->cp15.c1_secfg & 1)) {
+ env->cp15.c12_mvbar = val & ~0x1f;
+ }
+ break;
+ default:
+ goto bad_reg;
+ }
+ break;
+ }
goto bad_reg;
case 13: /* Process ID. */
switch (op2) {
@@ -1555,15 +1779,6 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
tlb_flush(env, 0);
env->cp15.c13_context = val;
break;
- case 2:
- env->cp15.c13_tls1 = val;
- break;
- case 3:
- env->cp15.c13_tls2 = val;
- break;
- case 4:
- env->cp15.c13_tls3 = val;
- break;
default:
goto bad_reg;
}
@@ -1640,8 +1855,28 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
return 0;
case 3: /* TLB type register. */
return 0; /* No lockable TLB entries. */
- case 5: /* CPU ID */
- return env->cpu_index;
+ case 5: /* MPIDR */
+ /* The MPIDR was standardised in v7; prior to
+ * this it was implemented only in the 11MPCore.
+ * For all other pre-v7 cores it does not exist.
+ */
+ if (arm_feature(env, ARM_FEATURE_V7) ||
+ ARM_CPUID(env) == ARM_CPUID_ARM11MPCORE) {
+ int mpidr = env->cpu_index;
+ /* We don't support setting cluster ID ([8..11])
+ * so these bits always RAZ.
+ */
+ if (arm_feature(env, ARM_FEATURE_V7MP)) {
+ mpidr |= (1 << 31);
+ /* Cores which are uniprocessor (non-coherent)
+ * but still implement the MP extensions set
+ * bit 30. (For instance, A9UP.) However we do
+ * not currently model any of those cores.
+ */
+ }
+ return mpidr;
+ }
+ /* otherwise fall through to the unimplemented-reg case */
default:
goto bad_reg;
}
@@ -1658,6 +1893,7 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
default:
goto bad_reg;
}
+ break;
case 1:
/* These registers aren't documented on arm11 cores. However
Linux looks at them anyway. */
@@ -1684,7 +1920,10 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
default:
goto bad_reg;
}
+ break;
case 1: /* System configuration. */
+ switch (crm) {
+ case 0:
if (arm_feature(env, ARM_FEATURE_OMAPCP))
op2 = 0;
switch (op2) {
@@ -1704,10 +1943,14 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
case ARM_CPUID_ARM11MPCORE:
return 1;
case ARM_CPUID_CORTEXA8:
+ case ARM_CPUID_CORTEXA8_R2:
return 2;
+ case ARM_CPUID_CORTEXA9:
+ return 0;
default:
goto bad_reg;
}
+ break;
case 2: /* Coprocessor access register. */
if (arm_feature(env, ARM_FEATURE_XSCALE))
goto bad_reg;
@@ -1715,6 +1958,30 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
default:
goto bad_reg;
}
+ break;
+ case 1:
+ if (!arm_feature(env, ARM_FEATURE_TRUSTZONE)
+ || (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR)
+ goto bad_reg;
+ switch (op2) {
+ case 0: /* Secure configuration register. */
+ if (env->cp15.c1_secfg & 1)
+ goto bad_reg;
+ return env->cp15.c1_secfg;
+ case 1: /* Secure debug enable register. */
+ if (env->cp15.c1_secfg & 1)
+ goto bad_reg;
+ return env->cp15.c1_sedbg;
+ case 2: /* Nonsecure access control register. */
+ return env->cp15.c1_nseac;
+ default:
+ goto bad_reg;
+ }
+ break;
+ default:
+ goto bad_reg;
+ }
+ break;
case 2: /* MMU Page table control / MPU cache control. */
if (arm_feature(env, ARM_FEATURE_MPU)) {
switch (op2) {
@@ -1781,34 +2048,37 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
if (arm_feature(env, ARM_FEATURE_V6)) {
/* Watchpoint Fault Adrress. */
return 0; /* Not implemented. */
- } else {
+ }
/* Instruction Fault Adrress. */
/* Arm9 doesn't have an IFAR, but implementing it anyway
shouldn't do any harm. */
return env->cp15.c6_insn;
- }
case 2:
if (arm_feature(env, ARM_FEATURE_V6)) {
/* Instruction Fault Adrress. */
return env->cp15.c6_insn;
- } else {
- goto bad_reg;
}
+ goto bad_reg;
default:
goto bad_reg;
}
}
case 7: /* Cache control. */
- /* FIXME: Should only clear Z flag if destination is r15. */
+ if (crm == 4 && op1 == 0 && op2 == 0) {
+ return env->cp15.c7_par;
+ }
+ if (((insn >> 12) & 0xf) == 0xf) /* clear ZF only if destination is r15 */
env->ZF = 0;
return 0;
case 8: /* MMU TLB control. */
goto bad_reg;
case 9: /* Cache lockdown. */
switch (op1) {
- case 0: /* L1 cache. */
+ case 0:
if (arm_feature(env, ARM_FEATURE_OMAPCP))
return 0;
+ switch (crm) {
+ case 0: /* L1 cache */
switch (op2) {
case 0:
return env->cp15.c9_data;
@@ -1817,6 +2087,37 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
default:
goto bad_reg;
}
+ break;
+ case 12:
+ switch (op2) {
+ case 0:
+ return env->cp15.c9_pmcr_data;
+ default:
+ goto bad_reg;
+ }
+ break;
+ case 14: /* performance monitor control */
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ switch (op2) {
+ case 0: /* user enable */
+ return env->cp15.c9_useren;
+ case 1: /* interrupt enable set */
+ case 2: /* interrupt enable clear */
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
+ goto bad_reg;
+ }
+ return env->cp15.c9_inten;
+ default:
+ goto bad_reg;
+ }
+ } else {
+ goto bad_reg;
+ }
+ break;
+ default:
+ goto bad_reg;
+ }
+ break;
case 1: /* L2 cache */
if (crm != 0)
goto bad_reg;
@@ -1830,6 +2131,22 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
return 0;
case 11: /* TCM DMA control. */
case 12: /* Reserved. */
+ if (!op1 && !crm) {
+ switch (op2) {
+ case 0: /* secure or nonsecure vector base address */
+ if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+ return env->cp15.c12_vbar;
+ }
+ break;
+ case 1: /* monitor vector base address */
+ if (arm_feature(env, ARM_FEATURE_TRUSTZONE)) {
+ return env->cp15.c12_mvbar;
+ }
+ break;
+ default:
+ break;
+ }
+ }
goto bad_reg;
case 13: /* Process ID. */
switch (op2) {
@@ -1837,12 +2154,6 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
return env->cp15.c13_fcse;
case 1:
return env->cp15.c13_context;
- case 2:
- return env->cp15.c13_tls1;
- case 3:
- return env->cp15.c13_tls2;
- case 4:
- return env->cp15.c13_tls3;
default:
goto bad_reg;
}
@@ -1887,12 +2198,20 @@ bad_reg:
void HELPER(set_r13_banked)(CPUState *env, uint32_t mode, uint32_t val)
{
+ if ((env->uncached_cpsr & CPSR_M) == mode) {
+ env->regs[13] = val;
+ } else {
env->banked_r13[bank_number(mode)] = val;
}
+}
uint32_t HELPER(get_r13_banked)(CPUState *env, uint32_t mode)
{
+ if ((env->uncached_cpsr & CPSR_M) == mode) {
+ return env->regs[13];
+ } else {
return env->banked_r13[bank_number(mode)];
+ }
}
uint32_t HELPER(v7m_mrs)(CPUState *env, uint32_t reg)
@@ -2101,7 +2420,7 @@ static inline uint16_t add16_usat(uint16_t a, uint16_t b)
static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
{
- if (a < b)
+ if (a > b)
return a - b;
else
return 0;
@@ -2118,7 +2437,7 @@ static inline uint8_t add8_usat(uint8_t a, uint8_t b)
static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
{
- if (a < b)
+ if (a > b)
return a - b;
else
return 0;
@@ -2135,7 +2454,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
/* Signed modulo arithmetic. */
#define SARITH16(a, b, n, op) do { \
int32_t sum; \
- sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \
+ sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
RESULT(sum, n, 16); \
if (sum >= 0) \
ge |= 3 << (n * 2); \
@@ -2143,7 +2462,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
#define SARITH8(a, b, n, op) do { \
int32_t sum; \
- sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \
+ sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
RESULT(sum, n, 8); \
if (sum >= 0) \
ge |= 1 << n; \
@@ -2279,10 +2598,12 @@ static inline int vfp_exceptbits_from_host(int host_bits)
target_bits |= 2;
if (host_bits & float_flag_overflow)
target_bits |= 4;
- if (host_bits & float_flag_underflow)
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal))
target_bits |= 8;
if (host_bits & float_flag_inexact)
target_bits |= 0x10;
+ if (host_bits & float_flag_input_denormal)
+ target_bits |= 0x80;
return target_bits;
}
@@ -2295,10 +2616,16 @@ uint32_t HELPER(vfp_get_fpscr)(CPUState *env)
| (env->vfp.vec_len << 16)
| (env->vfp.vec_stride << 20);
i = get_float_exception_flags(&env->vfp.fp_status);
+ i |= get_float_exception_flags(&env->vfp.standard_fp_status);
fpscr |= vfp_exceptbits_from_host(i);
return fpscr;
}
+uint32_t vfp_get_fpscr(CPUState *env)
+{
+ return HELPER(vfp_get_fpscr)(env);
+}
+
/* Convert vfp exception flags to target form. */
static inline int vfp_exceptbits_to_host(int target_bits)
{
@@ -2314,6 +2641,8 @@ static inline int vfp_exceptbits_to_host(int target_bits)
host_bits |= float_flag_underflow;
if (target_bits & 0x10)
host_bits |= float_flag_inexact;
+ if (target_bits & 0x80)
+ host_bits |= float_flag_input_denormal;
return host_bits;
}
@@ -2346,13 +2675,21 @@ void HELPER(vfp_set_fpscr)(CPUState *env, uint32_t val)
}
set_float_rounding_mode(i, &env->vfp.fp_status);
}
- if (changed & (1 << 24))
+ if (changed & (1 << 24)) {
set_flush_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status);
+ set_flush_inputs_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status);
+ }
if (changed & (1 << 25))
set_default_nan_mode((val & (1 << 25)) != 0, &env->vfp.fp_status);
- i = vfp_exceptbits_to_host((val >> 8) & 0x1f);
+ i = vfp_exceptbits_to_host(val);
set_float_exception_flags(i, &env->vfp.fp_status);
+ set_float_exception_flags(0, &env->vfp.standard_fp_status);
+}
+
+void vfp_set_fpscr(CPUState *env, uint32_t val)
+{
+ HELPER(vfp_set_fpscr)(env, val);
}
#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
@@ -2432,203 +2769,384 @@ DO_VFP_cmp(s, float32)
DO_VFP_cmp(d, float64)
#undef DO_VFP_cmp
-/* Helper routines to perform bitwise copies between float and int. */
-static inline float32 vfp_itos(uint32_t i)
-{
- union {
- uint32_t i;
- float32 s;
- } v;
+/* Integer to float and float to integer conversions */
- v.i = i;
- return v.s;
+#define CONV_ITOF(name, fsz, sign) \
+ float##fsz HELPER(name)(uint32_t x, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return sign##int32_to_##float##fsz(x, fpst); \
}
-static inline uint32_t vfp_stoi(float32 s)
-{
- union {
- uint32_t i;
- float32 s;
- } v;
-
- v.s = s;
- return v.i;
+#define CONV_FTOI(name, fsz, sign, round) \
+uint32_t HELPER(name)(float##fsz x, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ if (float##fsz##_is_any_nan(x)) { \
+ float_raise(float_flag_invalid, fpst); \
+ return 0; \
+ } \
+ return float##fsz##_to_##sign##int32##round(x, fpst); \
}
-static inline float64 vfp_itod(uint64_t i)
-{
- union {
- uint64_t i;
- float64 d;
- } v;
-
- v.i = i;
- return v.d;
-}
+#define FLOAT_CONVS(name, p, fsz, sign) \
+CONV_ITOF(vfp_##name##to##p, fsz, sign) \
+CONV_FTOI(vfp_to##name##p, fsz, sign, ) \
+CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero)
-static inline uint64_t vfp_dtoi(float64 d)
-{
- union {
- uint64_t i;
- float64 d;
- } v;
+FLOAT_CONVS(si, s, 32, )
+FLOAT_CONVS(si, d, 64, )
+FLOAT_CONVS(ui, s, 32, u)
+FLOAT_CONVS(ui, d, 64, u)
- v.d = d;
- return v.i;
-}
+#undef CONV_ITOF
+#undef CONV_FTOI
+#undef FLOAT_CONVS
-/* Integer to float conversion. */
-float32 VFP_HELPER(uito, s)(float32 x, CPUState *env)
+/* floating point conversion */
+float64 VFP_HELPER(fcvtd, s)(float32 x, CPUState *env)
{
- return uint32_to_float32(vfp_stoi(x), &env->vfp.fp_status);
+ float64 r = float32_to_float64(x, &env->vfp.fp_status);
+ /* ARM requires that S<->D conversion of any kind of NaN generates
+ * a quiet NaN by forcing the most significant frac bit to 1.
+ */
+ return float64_maybe_silence_nan(r);
}
-float64 VFP_HELPER(uito, d)(float32 x, CPUState *env)
+float32 VFP_HELPER(fcvts, d)(float64 x, CPUState *env)
{
- return uint32_to_float64(vfp_stoi(x), &env->vfp.fp_status);
+ float32 r = float64_to_float32(x, &env->vfp.fp_status);
+ /* ARM requires that S<->D conversion of any kind of NaN generates
+ * a quiet NaN by forcing the most significant frac bit to 1.
+ */
+ return float32_maybe_silence_nan(r);
}
-float32 VFP_HELPER(sito, s)(float32 x, CPUState *env)
-{
- return int32_to_float32(vfp_stoi(x), &env->vfp.fp_status);
-}
+/* VFP3 fixed point conversion. */
+#define VFP_CONV_FIX(name, p, fsz, itype, sign) \
+float##fsz HELPER(vfp_##name##to##p)(uint##fsz##_t x, uint32_t shift, \
+ void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ float##fsz tmp; \
+ tmp = sign##int32_to_##float##fsz((itype##_t)x, fpst); \
+ return float##fsz##_scalbn(tmp, -(int)shift, fpst); \
+} \
+uint##fsz##_t HELPER(vfp_to##name##p)(float##fsz x, uint32_t shift, \
+ void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ float##fsz tmp; \
+ if (float##fsz##_is_any_nan(x)) { \
+ float_raise(float_flag_invalid, fpst); \
+ return 0; \
+ } \
+ tmp = float##fsz##_scalbn(x, shift, fpst); \
+ return float##fsz##_to_##itype##_round_to_zero(tmp, fpst); \
+}
+
+VFP_CONV_FIX(sh, d, 64, int16, )
+VFP_CONV_FIX(sl, d, 64, int32, )
+VFP_CONV_FIX(uh, d, 64, uint16, u)
+VFP_CONV_FIX(ul, d, 64, uint32, u)
+VFP_CONV_FIX(sh, s, 32, int16, )
+VFP_CONV_FIX(sl, s, 32, int32, )
+VFP_CONV_FIX(uh, s, 32, uint16, u)
+VFP_CONV_FIX(ul, s, 32, uint32, u)
+#undef VFP_CONV_FIX
-float64 VFP_HELPER(sito, d)(float32 x, CPUState *env)
+/* Half precision conversions. */
+static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s)
{
- return int32_to_float64(vfp_stoi(x), &env->vfp.fp_status);
+ int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
+ float32 r = float16_to_float32(make_float16(a), ieee, s);
+ if (ieee) {
+ return float32_maybe_silence_nan(r);
+ }
+ return r;
}
-/* Float to integer conversion. */
-float32 VFP_HELPER(toui, s)(float32 x, CPUState *env)
+static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s)
{
- return vfp_itos(float32_to_uint32(x, &env->vfp.fp_status));
+ int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0;
+ float16 r = float32_to_float16(a, ieee, s);
+ if (ieee) {
+ r = float16_maybe_silence_nan(r);
+ }
+ return float16_val(r);
}
-float32 VFP_HELPER(toui, d)(float64 x, CPUState *env)
+float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
{
- return vfp_itos(float64_to_uint32(x, &env->vfp.fp_status));
+ return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status);
}
-float32 VFP_HELPER(tosi, s)(float32 x, CPUState *env)
+uint32_t HELPER(neon_fcvt_f32_to_f16)(float32 a, CPUState *env)
{
- return vfp_itos(float32_to_int32(x, &env->vfp.fp_status));
+ return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status);
}
-float32 VFP_HELPER(tosi, d)(float64 x, CPUState *env)
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env)
{
- return vfp_itos(float64_to_int32(x, &env->vfp.fp_status));
+ return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status);
}
-float32 VFP_HELPER(touiz, s)(float32 x, CPUState *env)
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env)
{
- return vfp_itos(float32_to_uint32_round_to_zero(x, &env->vfp.fp_status));
+ return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status);
}
-float32 VFP_HELPER(touiz, d)(float64 x, CPUState *env)
-{
- return vfp_itos(float64_to_uint32_round_to_zero(x, &env->vfp.fp_status));
-}
+#define float32_two make_float32(0x40000000)
+#define float32_three make_float32(0x40400000)
+#define float32_one_point_five make_float32(0x3fc00000)
-float32 VFP_HELPER(tosiz, s)(float32 x, CPUState *env)
+float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
{
- return vfp_itos(float32_to_int32_round_to_zero(x, &env->vfp.fp_status));
+ float_status *s = &env->vfp.standard_fp_status;
+ if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
+ (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
+ if (!(float32_is_zero(a) || float32_is_zero(b))) {
+ float_raise(float_flag_input_denormal, s);
+ }
+ return float32_two;
+ }
+ return float32_sub(float32_two, float32_mul(a, b, s), s);
}
-float32 VFP_HELPER(tosiz, d)(float64 x, CPUState *env)
+float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
{
- return vfp_itos(float64_to_int32_round_to_zero(x, &env->vfp.fp_status));
+ float_status *s = &env->vfp.standard_fp_status;
+ float32 product;
+ if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
+ (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
+ if (!(float32_is_zero(a) || float32_is_zero(b))) {
+ float_raise(float_flag_input_denormal, s);
+ }
+ return float32_one_point_five;
+ }
+ product = float32_mul(a, b, s);
+ return float32_div(float32_sub(float32_three, product, s), float32_two, s);
}
-/* floating point conversion */
-float64 VFP_HELPER(fcvtd, s)(float32 x, CPUState *env)
+/* NEON helpers. */
+
+/* Constants 256 and 512 are used in some helpers; we avoid relying on
+ * int->float conversions at run-time. */
+#define float64_256 make_float64(0x4070000000000000LL)
+#define float64_512 make_float64(0x4080000000000000LL)
+
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_estimate(float64 a, CPUState *env)
{
- return float32_to_float64(x, &env->vfp.fp_status);
+ /* These calculations mustn't set any fp exception flags,
+ * so we use a local copy of the fp_status.
+ */
+ float_status dummy_status = env->vfp.standard_fp_status;
+ float_status *s = &dummy_status;
+ /* q = (int)(a * 512.0) */
+ float64 q = float64_mul(float64_512, a, s);
+ int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* r = 1.0 / (((double)q + 0.5) / 512.0) */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_512, s);
+ q = float64_div(float64_one, q, s);
+
+ /* s = (int)(256.0 * r + 0.5) */
+ q = float64_mul(q, float64_256, s);
+ q = float64_add(q, float64_half, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* return (double)s / 256.0 */
+ return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
-float32 VFP_HELPER(fcvts, d)(float64 x, CPUState *env)
+float32 HELPER(recpe_f32)(float32 a, CPUState *env)
{
- return float64_to_float32(x, &env->vfp.fp_status);
-}
+ float_status *s = &env->vfp.standard_fp_status;
+ float64 f64;
+ uint32_t val32 = float32_val(a);
-/* VFP3 fixed point conversion. */
-#define VFP_CONV_FIX(name, p, ftype, itype, sign) \
-ftype VFP_HELPER(name##to, p)(ftype x, uint32_t shift, CPUState *env) \
-{ \
- ftype tmp; \
- tmp = sign##int32_to_##ftype ((itype)vfp_##p##toi(x), \
- &env->vfp.fp_status); \
- return ftype##_scalbn(tmp, -(int)shift, &env->vfp.fp_status); \
-} \
-ftype VFP_HELPER(to##name, p)(ftype x, uint32_t shift, CPUState *env) \
-{ \
- ftype tmp; \
- tmp = ftype##_scalbn(x, shift, &env->vfp.fp_status); \
- return vfp_ito##p((itype)ftype##_to_##sign##int32_round_to_zero(tmp, \
- &env->vfp.fp_status)); \
-}
-
-VFP_CONV_FIX(sh, d, float64, int16, )
-VFP_CONV_FIX(sl, d, float64, int32, )
-VFP_CONV_FIX(uh, d, float64, uint16, u)
-VFP_CONV_FIX(ul, d, float64, uint32, u)
-VFP_CONV_FIX(sh, s, float32, int16, )
-VFP_CONV_FIX(sl, s, float32, int32, )
-VFP_CONV_FIX(uh, s, float32, uint16, u)
-VFP_CONV_FIX(ul, s, float32, uint32, u)
-#undef VFP_CONV_FIX
+ int result_exp;
+ int a_exp = (val32 & 0x7f800000) >> 23;
+ int sign = val32 & 0x80000000;
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env)
-{
- float_status *s = &env->vfp.fp_status;
- float32 two = int32_to_float32(2, s);
- return float32_sub(two, float32_mul(a, b, s), s);
+ if (float32_is_any_nan(a)) {
+ if (float32_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, s);
+ }
+ return float32_default_nan;
+ } else if (float32_is_infinity(a)) {
+ return float32_set_sign(float32_zero, float32_is_neg(a));
+ } else if (float32_is_zero_or_denormal(a)) {
+ if (!float32_is_zero(a)) {
+ float_raise(float_flag_input_denormal, s);
+ }
+ float_raise(float_flag_divbyzero, s);
+ return float32_set_sign(float32_infinity, float32_is_neg(a));
+ } else if (a_exp >= 253) {
+ float_raise(float_flag_underflow, s);
+ return float32_set_sign(float32_zero, float32_is_neg(a));
+ }
+
+ f64 = make_float64((0x3feULL << 52)
+ | ((int64_t)(val32 & 0x7fffff) << 29));
+
+ result_exp = 253 - a_exp;
+
+ f64 = recip_estimate(f64, env);
+
+ val32 = sign
+ | ((result_exp & 0xff) << 23)
+ | ((float64_val(f64) >> 29) & 0x7fffff);
+ return make_float32(val32);
}
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_sqrt_estimate(float64 a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 three = int32_to_float32(3, s);
- return float32_sub(three, float32_mul(a, b, s), s);
-}
+ /* These calculations mustn't set any fp exception flags,
+ * so we use a local copy of the fp_status.
+ */
+ float_status dummy_status = env->vfp.standard_fp_status;
+ float_status *s = &dummy_status;
+ float64 q;
+ int64_t q_int;
-/* NEON helpers. */
+ if (float64_lt(a, float64_half, s)) {
+ /* range 0.25 <= a < 0.5 */
-/* TODO: The architecture specifies the value that the estimate functions
- should return. We return the exact reciprocal/root instead. */
-float32 HELPER(recpe_f32)(float32 a, CPUState *env)
-{
- float_status *s = &env->vfp.fp_status;
- float32 one = int32_to_float32(1, s);
- return float32_div(one, a, s);
+ /* a in units of 1/512 rounded down */
+ /* q0 = (int)(a * 512.0); */
+ q = float64_mul(float64_512, a, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* reciprocal root r */
+ /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_512, s);
+ q = float64_sqrt(q, s);
+ q = float64_div(float64_one, q, s);
+ } else {
+ /* range 0.5 <= a < 1.0 */
+
+ /* a in units of 1/256 rounded down */
+ /* q1 = (int)(a * 256.0); */
+ q = float64_mul(float64_256, a, s);
+ int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* reciprocal root r */
+ /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_256, s);
+ q = float64_sqrt(q, s);
+ q = float64_div(float64_one, q, s);
+ }
+ /* r in units of 1/256 rounded to nearest */
+ /* s = (int)(256.0 * r + 0.5); */
+
+ q = float64_mul(q, float64_256,s );
+ q = float64_add(q, float64_half, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* return (double)s / 256.0;*/
+ return float64_div(int64_to_float64(q_int, s), float64_256, s);
}
float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 one = int32_to_float32(1, s);
- return float32_div(one, float32_sqrt(a, s), s);
+ float_status *s = &env->vfp.standard_fp_status;
+ int result_exp;
+ float64 f64;
+ uint32_t val;
+ uint64_t val64;
+
+ val = float32_val(a);
+
+ if (float32_is_any_nan(a)) {
+ if (float32_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, s);
+ }
+ return float32_default_nan;
+ } else if (float32_is_zero_or_denormal(a)) {
+ if (!float32_is_zero(a)) {
+ float_raise(float_flag_input_denormal, s);
+ }
+ float_raise(float_flag_divbyzero, s);
+ return float32_set_sign(float32_infinity, float32_is_neg(a));
+ } else if (float32_is_neg(a)) {
+ float_raise(float_flag_invalid, s);
+ return float32_default_nan;
+ } else if (float32_is_infinity(a)) {
+ return float32_zero;
+ }
+
+ /* Normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+ if ((val & 0x800000) == 0) {
+ f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ | (0x3feULL << 52)
+ | ((uint64_t)(val & 0x7fffff) << 29));
+ } else {
+ f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ | (0x3fdULL << 52)
+ | ((uint64_t)(val & 0x7fffff) << 29));
+ }
+
+ result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+
+ f64 = recip_sqrt_estimate(f64, env);
+
+ val64 = float64_val(f64);
+
+ val = ((val64 >> 63) & 0x80000000)
+ | ((result_exp & 0xff) << 23)
+ | ((val64 >> 29) & 0x7fffff);
+ return make_float32(val);
}
uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 tmp;
- tmp = int32_to_float32(a, s);
- tmp = float32_scalbn(tmp, -32, s);
- tmp = helper_recpe_f32(tmp, env);
- tmp = float32_scalbn(tmp, 31, s);
- return float32_to_int32(tmp, s);
+ float64 f64;
+
+ if ((a & 0x80000000) == 0) {
+ return 0xffffffff;
+ }
+
+ f64 = make_float64((0x3feULL << 52)
+ | ((int64_t)(a & 0x7fffffff) << 21));
+
+ f64 = recip_estimate (f64, env);
+
+ return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 tmp;
- tmp = int32_to_float32(a, s);
- tmp = float32_scalbn(tmp, -32, s);
- tmp = helper_rsqrte_f32(tmp, env);
- tmp = float32_scalbn(tmp, 31, s);
- return float32_to_int32(tmp, s);
+ float64 f64;
+
+ if ((a & 0xc0000000) == 0) {
+ return 0xffffffff;
+ }
+
+ if (a & 0x80000000) {
+ f64 = make_float64((0x3feULL << 52)
+ | ((uint64_t)(a & 0x7fffffff) << 21));
+ } else { /* bits 31-30 == '01' */
+ f64 = make_float64((0x3fdULL << 52)
+ | ((uint64_t)(a & 0x3fffffff) << 22));
+ }
+
+ f64 = recip_sqrt_estimate(f64, env);
+
+ return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
void HELPER(set_teecr)(CPUState *env, uint32_t val)
diff --git a/target-arm/helpers.h b/target-arm/helper.h
index bf210fe..850e3d0 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helper.h
@@ -68,10 +68,6 @@ DEF_HELPER_2(get_cp, i32, env, i32)
DEF_HELPER_2(get_r13_banked, i32, env, i32)
DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
-DEF_HELPER_2(mark_exclusive, void, env, i32)
-DEF_HELPER_2(test_exclusive, i32, env, i32)
-DEF_HELPER_1(clrex, void, env)
-
DEF_HELPER_1(get_user_reg, i32, i32)
DEF_HELPER_2(set_user_reg, void, i32, i32)
@@ -100,36 +96,41 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
-DEF_HELPER_2(vfp_uitos, f32, f32, env)
-DEF_HELPER_2(vfp_uitod, f64, f32, env)
-DEF_HELPER_2(vfp_sitos, f32, f32, env)
-DEF_HELPER_2(vfp_sitod, f64, f32, env)
-
-DEF_HELPER_2(vfp_touis, f32, f32, env)
-DEF_HELPER_2(vfp_touid, f32, f64, env)
-DEF_HELPER_2(vfp_touizs, f32, f32, env)
-DEF_HELPER_2(vfp_touizd, f32, f64, env)
-DEF_HELPER_2(vfp_tosis, f32, f32, env)
-DEF_HELPER_2(vfp_tosid, f32, f64, env)
-DEF_HELPER_2(vfp_tosizs, f32, f32, env)
-DEF_HELPER_2(vfp_tosizd, f32, f64, env)
-
-DEF_HELPER_3(vfp_toshs, f32, f32, i32, env)
-DEF_HELPER_3(vfp_tosls, f32, f32, i32, env)
-DEF_HELPER_3(vfp_touhs, f32, f32, i32, env)
-DEF_HELPER_3(vfp_touls, f32, f32, i32, env)
-DEF_HELPER_3(vfp_toshd, f64, f64, i32, env)
-DEF_HELPER_3(vfp_tosld, f64, f64, i32, env)
-DEF_HELPER_3(vfp_touhd, f64, f64, i32, env)
-DEF_HELPER_3(vfp_tould, f64, f64, i32, env)
-DEF_HELPER_3(vfp_shtos, f32, f32, i32, env)
-DEF_HELPER_3(vfp_sltos, f32, f32, i32, env)
-DEF_HELPER_3(vfp_uhtos, f32, f32, i32, env)
-DEF_HELPER_3(vfp_ultos, f32, f32, i32, env)
-DEF_HELPER_3(vfp_shtod, f64, f64, i32, env)
-DEF_HELPER_3(vfp_sltod, f64, f64, i32, env)
-DEF_HELPER_3(vfp_uhtod, f64, f64, i32, env)
-DEF_HELPER_3(vfp_ultod, f64, f64, i32, env)
+DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
+DEF_HELPER_2(vfp_uitod, f64, i32, ptr)
+DEF_HELPER_2(vfp_sitos, f32, i32, ptr)
+DEF_HELPER_2(vfp_sitod, f64, i32, ptr)
+
+DEF_HELPER_2(vfp_touis, i32, f32, ptr)
+DEF_HELPER_2(vfp_touid, i32, f64, ptr)
+DEF_HELPER_2(vfp_touizs, i32, f32, ptr)
+DEF_HELPER_2(vfp_touizd, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosis, i32, f32, ptr)
+DEF_HELPER_2(vfp_tosid, i32, f64, ptr)
+DEF_HELPER_2(vfp_tosizs, i32, f32, ptr)
+DEF_HELPER_2(vfp_tosizd, i32, f64, ptr)
+
+DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr)
+DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr)
+DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
+
+DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env)
+DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env)
+DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env)
+DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env)
DEF_HELPER_3(recps_f32, f32, f32, f32, env)
DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
@@ -138,10 +139,6 @@ DEF_HELPER_2(rsqrte_f32, f32, f32, env)
DEF_HELPER_2(recpe_u32, i32, i32, env)
DEF_HELPER_2(rsqrte_u32, i32, i32, env)
DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32)
-DEF_HELPER_2(neon_add_saturate_u64, i64, i64, i64)
-DEF_HELPER_2(neon_add_saturate_s64, i64, i64, i64)
-DEF_HELPER_2(neon_sub_saturate_u64, i64, i64, i64)
-DEF_HELPER_2(neon_sub_saturate_s64, i64, i64, i64)
DEF_HELPER_2(add_cc, i32, i32, i32)
DEF_HELPER_2(adc_cc, i32, i32, i32)
@@ -151,21 +148,28 @@ DEF_HELPER_2(sbc_cc, i32, i32, i32)
DEF_HELPER_2(shl, i32, i32, i32)
DEF_HELPER_2(shr, i32, i32, i32)
DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(ror, i32, i32, i32)
DEF_HELPER_2(shl_cc, i32, i32, i32)
DEF_HELPER_2(shr_cc, i32, i32, i32)
DEF_HELPER_2(sar_cc, i32, i32, i32)
DEF_HELPER_2(ror_cc, i32, i32, i32)
/* neon_helper.c */
-DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32)
+DEF_HELPER_2(neon_qadd_u8, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_s8, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_u16, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_u32, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_s32, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_u8, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_s8, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_u16, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_u32, i32, i32, i32)
+DEF_HELPER_2(neon_qsub_s32, i32, i32, i32)
+DEF_HELPER_2(neon_qadd_u64, i64, i64, i64)
+DEF_HELPER_2(neon_qadd_s64, i64, i64, i64)
+DEF_HELPER_2(neon_qsub_u64, i64, i64, i64)
+DEF_HELPER_2(neon_qsub_s64, i64, i64, i64)
DEF_HELPER_2(neon_hadd_s8, i32, i32, i32)
DEF_HELPER_2(neon_hadd_u8, i32, i32, i32)
@@ -243,22 +247,26 @@ DEF_HELPER_2(neon_rshl_u32, i32, i32, i32)
DEF_HELPER_2(neon_rshl_s32, i32, i32, i32)
DEF_HELPER_2(neon_rshl_u64, i64, i64, i64)
DEF_HELPER_2(neon_rshl_s64, i64, i64, i64)
-DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64)
-DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64)
+DEF_HELPER_2(neon_qshl_u8, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_s8, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_u16, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_u32, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_s32, i32, i32, i32)
+DEF_HELPER_2(neon_qshl_u64, i64, i64, i64)
+DEF_HELPER_2(neon_qshl_s64, i64, i64, i64)
+DEF_HELPER_2(neon_qshlu_s8, i32, i32, i32);
+DEF_HELPER_2(neon_qshlu_s16, i32, i32, i32);
+DEF_HELPER_2(neon_qshlu_s32, i32, i32, i32);
+DEF_HELPER_2(neon_qshlu_s64, i64, i64, i64);
+DEF_HELPER_2(neon_qrshl_u8, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_s8, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_u16, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_u32, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_s32, i32, i32, i32)
+DEF_HELPER_2(neon_qrshl_u64, i64, i64, i64)
+DEF_HELPER_2(neon_qrshl_s64, i64, i64, i64)
DEF_HELPER_2(neon_add_u8, i32, i32, i32)
DEF_HELPER_2(neon_add_u16, i32, i32, i32)
@@ -269,6 +277,7 @@ DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
+DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
@@ -286,19 +295,22 @@ DEF_HELPER_1(neon_cls_s16, i32, i32)
DEF_HELPER_1(neon_cls_s32, i32, i32)
DEF_HELPER_1(neon_cnt_u8, i32, i32)
-DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32)
+DEF_HELPER_2(neon_qdmulh_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qrdmulh_s16, i32, i32, i32)
+DEF_HELPER_2(neon_qdmulh_s32, i32, i32, i32)
+DEF_HELPER_2(neon_qrdmulh_s32, i32, i32, i32)
DEF_HELPER_1(neon_narrow_u8, i32, i64)
DEF_HELPER_1(neon_narrow_u16, i32, i64)
-DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64)
-DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64)
+DEF_HELPER_1(neon_unarrow_sat8, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_u8, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_s8, i32, i64)
+DEF_HELPER_1(neon_unarrow_sat16, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_u16, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_s16, i32, i64)
+DEF_HELPER_1(neon_unarrow_sat32, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_u32, i32, i64)
+DEF_HELPER_1(neon_narrow_sat_s32, i32, i64)
DEF_HELPER_1(neon_narrow_high_u8, i32, i64)
DEF_HELPER_1(neon_narrow_high_u16, i32, i64)
DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64)
@@ -314,8 +326,8 @@ DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
-DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
-DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64)
+DEF_HELPER_2(neon_addl_saturate_s32, i64, i64, i64)
+DEF_HELPER_2(neon_addl_saturate_s64, i64, i64, i64)
DEF_HELPER_2(neon_abdl_u16, i64, i32, i32)
DEF_HELPER_2(neon_abdl_s16, i64, i32, i32)
DEF_HELPER_2(neon_abdl_u32, i64, i32, i32)
@@ -331,18 +343,12 @@ DEF_HELPER_1(neon_negl_u16, i64, i64)
DEF_HELPER_1(neon_negl_u32, i64, i64)
DEF_HELPER_1(neon_negl_u64, i64, i64)
-DEF_HELPER_2(neon_qabs_s8, i32, env, i32)
-DEF_HELPER_2(neon_qabs_s16, i32, env, i32)
-DEF_HELPER_2(neon_qabs_s32, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
-
-DEF_HELPER_0(neon_trn_u8, void)
-DEF_HELPER_0(neon_trn_u16, void)
-DEF_HELPER_0(neon_unzip_u8, void)
-DEF_HELPER_0(neon_zip_u8, void)
-DEF_HELPER_0(neon_zip_u16, void)
+DEF_HELPER_1(neon_qabs_s8, i32, i32)
+DEF_HELPER_1(neon_qabs_s16, i32, i32)
+DEF_HELPER_1(neon_qabs_s32, i32, i32)
+DEF_HELPER_1(neon_qneg_s8, i32, i32)
+DEF_HELPER_1(neon_qneg_s16, i32, i32)
+DEF_HELPER_1(neon_qneg_s32, i32, i32)
DEF_HELPER_2(neon_min_f32, i32, i32, i32)
DEF_HELPER_2(neon_max_f32, i32, i32, i32)
@@ -369,47 +375,47 @@ DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64)
DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64)
DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64)
-#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \
-DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \
-DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \
-
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackl)
-DEF_IWMMXT_HELPER_SIZE_ENV(unpackh)
-
-DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64)
-DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu)
-DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(mins)
-DEF_IWMMXT_HELPER_SIZE_ENV(minu)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxs)
-DEF_IWMMXT_HELPER_SIZE_ENV(maxu)
-
-DEF_IWMMXT_HELPER_SIZE_ENV(subn)
-DEF_IWMMXT_HELPER_SIZE_ENV(addn)
-DEF_IWMMXT_HELPER_SIZE_ENV(subu)
-DEF_IWMMXT_HELPER_SIZE_ENV(addu)
-DEF_IWMMXT_HELPER_SIZE_ENV(subs)
-DEF_IWMMXT_HELPER_SIZE_ENV(adds)
-
-DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
+#define DEF_IWMMXT_HELPER_SIZE(name) \
+DEF_HELPER_2(iwmmxt_##name##b, i64, i64, i64) \
+DEF_HELPER_2(iwmmxt_##name##w, i64, i64, i64) \
+DEF_HELPER_2(iwmmxt_##name##l, i64, i64, i64) \
+
+DEF_IWMMXT_HELPER_SIZE(unpackl)
+DEF_IWMMXT_HELPER_SIZE(unpackh)
+
+DEF_HELPER_1(iwmmxt_unpacklub, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackluw, i64, i64)
+DEF_HELPER_1(iwmmxt_unpacklul, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhub, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhuw, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhul, i64, i64)
+DEF_HELPER_1(iwmmxt_unpacklsb, i64, i64)
+DEF_HELPER_1(iwmmxt_unpacklsw, i64, i64)
+DEF_HELPER_1(iwmmxt_unpacklsl, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhsb, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhsw, i64, i64)
+DEF_HELPER_1(iwmmxt_unpackhsl, i64, i64)
+
+DEF_IWMMXT_HELPER_SIZE(cmpeq)
+DEF_IWMMXT_HELPER_SIZE(cmpgtu)
+DEF_IWMMXT_HELPER_SIZE(cmpgts)
+
+DEF_IWMMXT_HELPER_SIZE(mins)
+DEF_IWMMXT_HELPER_SIZE(minu)
+DEF_IWMMXT_HELPER_SIZE(maxs)
+DEF_IWMMXT_HELPER_SIZE(maxu)
+
+DEF_IWMMXT_HELPER_SIZE(subn)
+DEF_IWMMXT_HELPER_SIZE(addn)
+DEF_IWMMXT_HELPER_SIZE(subu)
+DEF_IWMMXT_HELPER_SIZE(addu)
+DEF_IWMMXT_HELPER_SIZE(subs)
+DEF_IWMMXT_HELPER_SIZE(adds)
+
+DEF_HELPER_2(iwmmxt_avgb0, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_avgb1, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_avgw0, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_avgw1, i64, i64, i64)
DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64)
@@ -428,26 +434,26 @@ DEF_HELPER_1(iwmmxt_msbb, i32, i64)
DEF_HELPER_1(iwmmxt_msbw, i32, i64)
DEF_HELPER_1(iwmmxt_msbl, i32, i64)
-DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32)
-DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32)
-
-DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64)
-DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64)
+DEF_HELPER_2(iwmmxt_srlw, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_srll, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_srlq, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_sllw, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_slll, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_sllq, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_sraw, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_sral, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_sraq, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_rorw, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_rorl, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_rorq, i64, i64, i32)
+DEF_HELPER_2(iwmmxt_shufh, i64, i64, i32)
+
+DEF_HELPER_2(iwmmxt_packuw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_packul, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_packuq, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_packsw, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_packsl, i64, i64, i64)
+DEF_HELPER_2(iwmmxt_packsq, i64, i64, i64)
DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32)
DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32)
@@ -455,4 +461,17 @@ DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32)
DEF_HELPER_2(set_teecr, void, env, i32)
+DEF_HELPER_2(neon_unzip8, void, i32, i32)
+DEF_HELPER_2(neon_unzip16, void, i32, i32)
+DEF_HELPER_2(neon_qunzip8, void, i32, i32)
+DEF_HELPER_2(neon_qunzip16, void, i32, i32)
+DEF_HELPER_2(neon_qunzip32, void, i32, i32)
+DEF_HELPER_2(neon_zip8, void, i32, i32)
+DEF_HELPER_2(neon_zip16, void, i32, i32)
+DEF_HELPER_2(neon_qzip8, void, i32, i32)
+DEF_HELPER_2(neon_qzip16, void, i32, i32)
+DEF_HELPER_2(neon_qzip32, void, i32, i32)
+DEF_HELPER_1(neon_vldst_all, void, i32)
+
#include "helper-android.h"
+#include "def-helper.h"
diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c
index 2e4193e..ebe6eb9 100644
--- a/target-arm/iwmmxt_helper.c
+++ b/target-arm/iwmmxt_helper.c
@@ -16,16 +16,15 @@
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include "cpu.h"
-#include "exec-all.h"
-#include "helpers.h"
+#include "exec.h"
+#include "helper.h"
/* iwMMXt macros extracted from GNU gdb. */
@@ -163,8 +162,7 @@ uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b)
SIMD64_SET(NBIT64(x), SIMD_NBIT) | \
SIMD64_SET(ZBIT64(x), SIMD_ZBIT)
#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3) \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(uint64_t a, uint64_t b) \
{ \
a = \
(((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) | \
@@ -178,8 +176,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUState *env, \
NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \
return a; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(uint64_t a, uint64_t b) \
{ \
a = \
(((a >> SH0) & 0xffff) << 0) | \
@@ -191,8 +188,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUState *env, \
NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3); \
return a; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(uint64_t a, uint64_t b) \
{ \
a = \
(((a >> SH0) & 0xffffffff) << 0) | \
@@ -201,8 +197,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUState *env, \
NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \
return a; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(uint64_t x) \
{ \
x = \
(((x >> SH0) & 0xff) << 0) | \
@@ -214,8 +209,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUState *env, \
NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \
return x; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(uint64_t x) \
{ \
x = \
(((x >> SH0) & 0xffff) << 0) | \
@@ -224,15 +218,13 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUState *env, \
NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \
return x; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(uint64_t x) \
{ \
x = (((x >> SH0) & 0xffffffff) << 0); \
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \
return x; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(uint64_t x) \
{ \
x = \
((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) | \
@@ -244,8 +236,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUState *env, \
NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \
return x; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(uint64_t x) \
{ \
x = \
((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) | \
@@ -254,8 +245,7 @@ uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUState *env, \
NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \
return x; \
} \
-uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUState *env, \
- uint64_t x) \
+uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(uint64_t x) \
{ \
x = EXTEND32((x >> SH0) & 0xffffffff); \
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \
@@ -265,8 +255,7 @@ IWMMXT_OP_UNPACK(l, 0, 8, 16, 24)
IWMMXT_OP_UNPACK(h, 32, 40, 48, 56)
#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O) \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(uint64_t a, uint64_t b) \
{ \
a = \
CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) | \
@@ -280,8 +269,7 @@ uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUState *env, \
NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \
return a; \
} \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(uint64_t a, uint64_t b) \
{ \
a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) | \
CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff); \
@@ -290,8 +278,7 @@ uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUState *env, \
NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); \
return a; \
} \
-uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUState *env, \
- uint64_t a, uint64_t b) \
+uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(uint64_t a, uint64_t b) \
{ \
a = CMP(0, Tl, O, 0xffffffff) | \
CMP(32, Tl, O, 0xffffffff); \
@@ -330,7 +317,7 @@ IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +)
#define AVGB(SHR) ((( \
((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR)
#define IWMMXT_OP_AVGB(r) \
-uint64_t HELPER(iwmmxt_avgb##r)(CPUState *env, uint64_t a, uint64_t b) \
+uint64_t HELPER(iwmmxt_avgb##r)(uint64_t a, uint64_t b) \
{ \
const int round = r; \
a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) | \
@@ -354,7 +341,7 @@ IWMMXT_OP_AVGB(1)
#define AVGW(SHR) ((( \
((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR)
#define IWMMXT_OP_AVGW(r) \
-uint64_t HELPER(iwmmxt_avgw##r)(CPUState *env, uint64_t a, uint64_t b) \
+uint64_t HELPER(iwmmxt_avgw##r)(uint64_t a, uint64_t b) \
{ \
const int round = r; \
a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48); \
@@ -465,7 +452,7 @@ uint32_t HELPER(iwmmxt_msbl)(uint64_t x)
}
/* FIXME: Split wCASF setting into a separate op to avoid env use. */
-uint64_t HELPER(iwmmxt_srlw)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srlw)(uint64_t x, uint32_t n)
{
x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) |
(((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) |
@@ -477,7 +464,7 @@ uint64_t HELPER(iwmmxt_srlw)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_srll)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srll)(uint64_t x, uint32_t n)
{
x = ((x & (0xffffffffll << 0)) >> n) |
((x >> n) & (0xffffffffll << 32));
@@ -486,14 +473,14 @@ uint64_t HELPER(iwmmxt_srll)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_srlq)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_srlq)(uint64_t x, uint32_t n)
{
x >>= n;
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
return x;
}
-uint64_t HELPER(iwmmxt_sllw)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sllw)(uint64_t x, uint32_t n)
{
x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) |
(((x & (0xffffll << 16)) << n) & (0xffffll << 16)) |
@@ -505,7 +492,7 @@ uint64_t HELPER(iwmmxt_sllw)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_slll)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_slll)(uint64_t x, uint32_t n)
{
x = ((x << n) & (0xffffffffll << 0)) |
((x & (0xffffffffll << 32)) << n);
@@ -514,14 +501,14 @@ uint64_t HELPER(iwmmxt_slll)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_sllq)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sllq)(uint64_t x, uint32_t n)
{
x <<= n;
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
return x;
}
-uint64_t HELPER(iwmmxt_sraw)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sraw)(uint64_t x, uint32_t n)
{
x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) |
((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) |
@@ -533,7 +520,7 @@ uint64_t HELPER(iwmmxt_sraw)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_sral)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sral)(uint64_t x, uint32_t n)
{
x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) |
(((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32);
@@ -542,14 +529,14 @@ uint64_t HELPER(iwmmxt_sral)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_sraq)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_sraq)(uint64_t x, uint32_t n)
{
x = (int64_t) x >> n;
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
return x;
}
-uint64_t HELPER(iwmmxt_rorw)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorw)(uint64_t x, uint32_t n)
{
x = ((((x & (0xffffll << 0)) >> n) |
((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) |
@@ -565,7 +552,7 @@ uint64_t HELPER(iwmmxt_rorw)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_rorl)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorl)(uint64_t x, uint32_t n)
{
x = ((x & (0xffffffffll << 0)) >> n) |
((x >> n) & (0xffffffffll << 32)) |
@@ -576,14 +563,14 @@ uint64_t HELPER(iwmmxt_rorl)(CPUState *env, uint64_t x, uint32_t n)
return x;
}
-uint64_t HELPER(iwmmxt_rorq)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_rorq)(uint64_t x, uint32_t n)
{
x = (x >> n) | (x << (64 - n));
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x);
return x;
}
-uint64_t HELPER(iwmmxt_shufh)(CPUState *env, uint64_t x, uint32_t n)
+uint64_t HELPER(iwmmxt_shufh)(uint64_t x, uint32_t n)
{
x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) |
(((x >> ((n << 2) & 0x30)) & 0xffff) << 16) |
@@ -596,7 +583,7 @@ uint64_t HELPER(iwmmxt_shufh)(CPUState *env, uint64_t x, uint32_t n)
}
/* TODO: Unsigned-Saturation */
-uint64_t HELPER(iwmmxt_packuw)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packuw)(uint64_t a, uint64_t b)
{
a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
(((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
@@ -610,7 +597,7 @@ uint64_t HELPER(iwmmxt_packuw)(CPUState *env, uint64_t a, uint64_t b)
return a;
}
-uint64_t HELPER(iwmmxt_packul)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packul)(uint64_t a, uint64_t b)
{
a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
(((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
@@ -620,7 +607,7 @@ uint64_t HELPER(iwmmxt_packul)(CPUState *env, uint64_t a, uint64_t b)
return a;
}
-uint64_t HELPER(iwmmxt_packuq)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packuq)(uint64_t a, uint64_t b)
{
a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
@@ -629,7 +616,7 @@ uint64_t HELPER(iwmmxt_packuq)(CPUState *env, uint64_t a, uint64_t b)
}
/* TODO: Signed-Saturation */
-uint64_t HELPER(iwmmxt_packsw)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsw)(uint64_t a, uint64_t b)
{
a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) |
(((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) |
@@ -643,7 +630,7 @@ uint64_t HELPER(iwmmxt_packsw)(CPUState *env, uint64_t a, uint64_t b)
return a;
}
-uint64_t HELPER(iwmmxt_packsl)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsl)(uint64_t a, uint64_t b)
{
a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) |
(((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48);
@@ -653,7 +640,7 @@ uint64_t HELPER(iwmmxt_packsl)(CPUState *env, uint64_t a, uint64_t b)
return a;
}
-uint64_t HELPER(iwmmxt_packsq)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(iwmmxt_packsq)(uint64_t a, uint64_t b)
{
a = (a & 0xffffffff) | ((b & 0xffffffff) << 32);
env->iwmmxt.cregs[ARM_IWMMXT_wCASF] =
diff --git a/target-arm/machine.c b/target-arm/machine.c
index b1deacb..1726fd5 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -22,12 +22,18 @@ void cpu_save(QEMUFile *f, void *opaque)
}
qemu_put_be32(f, env->cp15.c0_cpuid);
qemu_put_be32(f, env->cp15.c0_cachetype);
+ qemu_put_be32(f, env->cp15.c0_cssel);
qemu_put_be32(f, env->cp15.c1_sys);
qemu_put_be32(f, env->cp15.c1_coproc);
qemu_put_be32(f, env->cp15.c1_xscaleauxcr);
+ qemu_put_be32(f, env->cp15.c1_secfg);
+ qemu_put_be32(f, env->cp15.c1_sedbg);
+ qemu_put_be32(f, env->cp15.c1_nseac);
qemu_put_be32(f, env->cp15.c2_base0);
qemu_put_be32(f, env->cp15.c2_base1);
+ qemu_put_be32(f, env->cp15.c2_control);
qemu_put_be32(f, env->cp15.c2_mask);
+ qemu_put_be32(f, env->cp15.c2_base_mask);
qemu_put_be32(f, env->cp15.c2_data);
qemu_put_be32(f, env->cp15.c2_insn);
qemu_put_be32(f, env->cp15.c3);
@@ -38,8 +44,12 @@ void cpu_save(QEMUFile *f, void *opaque)
}
qemu_put_be32(f, env->cp15.c6_insn);
qemu_put_be32(f, env->cp15.c6_data);
+ qemu_put_be32(f, env->cp15.c7_par);
qemu_put_be32(f, env->cp15.c9_insn);
qemu_put_be32(f, env->cp15.c9_data);
+ qemu_put_be32(f, env->cp15.c9_pmcr_data);
+ qemu_put_be32(f, env->cp15.c9_useren);
+ qemu_put_be32(f, env->cp15.c9_inten);
qemu_put_be32(f, env->cp15.c13_fcse);
qemu_put_be32(f, env->cp15.c13_context);
qemu_put_be32(f, env->cp15.c13_tls1);
@@ -47,6 +57,8 @@ void cpu_save(QEMUFile *f, void *opaque)
qemu_put_be32(f, env->cp15.c13_tls3);
qemu_put_be32(f, env->cp15.c15_cpar);
+ qemu_put_be32(f, env->cp14_dbgdidr);
+
qemu_put_be32(f, env->features);
if (arm_feature(env, ARM_FEATURE_VFP)) {
@@ -91,12 +103,18 @@ void cpu_save(QEMUFile *f, void *opaque)
qemu_put_be32(f, env->v7m.current_sp);
qemu_put_be32(f, env->v7m.exception);
}
+
+ if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+ qemu_put_be32(f, env->teecr);
+ qemu_put_be32(f, env->teehbr);
+ }
}
int cpu_load(QEMUFile *f, void *opaque, int version_id)
{
CPUARMState *env = (CPUARMState *)opaque;
int i;
+ uint32_t val;
if (version_id != CPU_SAVE_VERSION)
return -EINVAL;
@@ -104,7 +122,10 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
for (i = 0; i < 16; i++) {
env->regs[i] = qemu_get_be32(f);
}
- cpsr_write(env, qemu_get_be32(f), 0xffffffff);
+ val = qemu_get_be32(f);
+ /* Avoid mode switch when restoring CPSR. */
+ env->uncached_cpsr = val & CPSR_M;
+ cpsr_write(env, val, 0xffffffff);
env->spsr = qemu_get_be32(f);
for (i = 0; i < 6; i++) {
env->banked_spsr[i] = qemu_get_be32(f);
@@ -117,12 +138,18 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
}
env->cp15.c0_cpuid = qemu_get_be32(f);
env->cp15.c0_cachetype = qemu_get_be32(f);
+ env->cp15.c0_cssel = qemu_get_be32(f);
env->cp15.c1_sys = qemu_get_be32(f);
env->cp15.c1_coproc = qemu_get_be32(f);
env->cp15.c1_xscaleauxcr = qemu_get_be32(f);
+ env->cp15.c1_secfg = qemu_get_be32(f);
+ env->cp15.c1_sedbg = qemu_get_be32(f);
+ env->cp15.c1_nseac = qemu_get_be32(f);
env->cp15.c2_base0 = qemu_get_be32(f);
env->cp15.c2_base1 = qemu_get_be32(f);
+ env->cp15.c2_control = qemu_get_be32(f);
env->cp15.c2_mask = qemu_get_be32(f);
+ env->cp15.c2_base_mask = qemu_get_be32(f);
env->cp15.c2_data = qemu_get_be32(f);
env->cp15.c2_insn = qemu_get_be32(f);
env->cp15.c3 = qemu_get_be32(f);
@@ -133,8 +160,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
}
env->cp15.c6_insn = qemu_get_be32(f);
env->cp15.c6_data = qemu_get_be32(f);
+ env->cp15.c7_par = qemu_get_be32(f);
env->cp15.c9_insn = qemu_get_be32(f);
env->cp15.c9_data = qemu_get_be32(f);
+ env->cp15.c9_pmcr_data = qemu_get_be32(f);
+ env->cp15.c9_useren = qemu_get_be32(f);
+ env->cp15.c9_inten = qemu_get_be32(f);
env->cp15.c13_fcse = qemu_get_be32(f);
env->cp15.c13_context = qemu_get_be32(f);
env->cp15.c13_tls1 = qemu_get_be32(f);
@@ -142,6 +173,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
env->cp15.c13_tls3 = qemu_get_be32(f);
env->cp15.c15_cpar = qemu_get_be32(f);
+ env->cp14_dbgdidr = qemu_get_be32(f);
+
env->features = qemu_get_be32(f);
if (arm_feature(env, ARM_FEATURE_VFP)) {
@@ -187,5 +220,10 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
env->v7m.exception = qemu_get_be32(f);
}
+ if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+ env->teecr = qemu_get_be32(f);
+ env->teehbr = qemu_get_be32(f);
+ }
+
return 0;
}
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index f32ecd6..9165519 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -10,39 +10,15 @@
#include <stdio.h>
#include "cpu.h"
-#include "exec-all.h"
-#include "helpers.h"
+#include "exec.h"
+#include "helper.h"
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q
-static float_status neon_float_status;
-#define NFS &neon_float_status
-
-/* Helper routines to perform bitwise copies between float and int. */
-static inline float32 vfp_itos(uint32_t i)
-{
- union {
- uint32_t i;
- float32 s;
- } v;
-
- v.i = i;
- return v.s;
-}
-
-static inline uint32_t vfp_stoi(float32 s)
-{
- union {
- uint32_t i;
- float32 s;
- } v;
-
- v.s = s;
- return v.i;
-}
+#define NFS (&env->vfp.standard_fp_status)
#define NEON_TYPE1(name, type) \
typedef struct \
@@ -139,10 +115,6 @@ NEON_TYPE1(u32, uint32_t)
uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \
NEON_VOP_BODY(vtype, n)
-#define NEON_VOP_ENV(name, vtype, n) \
-uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \
-NEON_VOP_BODY(vtype, n)
-
/* Pairwise operations. */
/* For 32-bit elements each segment only contains a single element, so
the elementwise and pairwise operations are the same. */
@@ -191,13 +163,35 @@ uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
dest = tmp; \
}} while(0)
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
-NEON_VOP_ENV(qadd_u8, neon_u8, 4)
+NEON_VOP(qadd_u8, neon_u8, 4)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
-NEON_VOP_ENV(qadd_u16, neon_u16, 2)
+NEON_VOP(qadd_u16, neon_u16, 2)
#undef NEON_FN
#undef NEON_USAT
+uint32_t HELPER(neon_qadd_u32)(uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (res < a) {
+ SET_QC();
+ res = ~0;
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_qadd_u64)(uint64_t src1, uint64_t src2)
+{
+ uint64_t res;
+
+ res = src1 + src2;
+ if (res < src1) {
+ SET_QC();
+ res = ~(uint64_t)0;
+ }
+ return res;
+}
+
#define NEON_SSAT(dest, src1, src2, type) do { \
int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
if (tmp != (type)tmp) { \
@@ -211,13 +205,35 @@ NEON_VOP_ENV(qadd_u16, neon_u16, 2)
dest = tmp; \
} while(0)
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
-NEON_VOP_ENV(qadd_s8, neon_s8, 4)
+NEON_VOP(qadd_s8, neon_s8, 4)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
-NEON_VOP_ENV(qadd_s16, neon_s16, 2)
+NEON_VOP(qadd_s16, neon_s16, 2)
#undef NEON_FN
#undef NEON_SSAT
+uint32_t HELPER(neon_qadd_s32)(uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
+ SET_QC();
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_qadd_s64)(uint64_t src1, uint64_t src2)
+{
+ uint64_t res;
+
+ res = src1 + src2;
+ if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
+ SET_QC();
+ res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+ }
+ return res;
+}
+
#define NEON_USAT(dest, src1, src2, type) do { \
uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
if (tmp != (type)tmp) { \
@@ -227,13 +243,36 @@ NEON_VOP_ENV(qadd_s16, neon_s16, 2)
dest = tmp; \
}} while(0)
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
-NEON_VOP_ENV(qsub_u8, neon_u8, 4)
+NEON_VOP(qsub_u8, neon_u8, 4)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
-NEON_VOP_ENV(qsub_u16, neon_u16, 2)
+NEON_VOP(qsub_u16, neon_u16, 2)
#undef NEON_FN
#undef NEON_USAT
+uint32_t HELPER(neon_qsub_u32)(uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (res > a) {
+ SET_QC();
+ res = 0;
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_qsub_u64)(uint64_t src1, uint64_t src2)
+{
+ uint64_t res;
+
+ if (src1 < src2) {
+ SET_QC();
+ res = 0;
+ } else {
+ res = src1 - src2;
+ }
+ return res;
+}
+
#define NEON_SSAT(dest, src1, src2, type) do { \
int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
if (tmp != (type)tmp) { \
@@ -247,13 +286,35 @@ NEON_VOP_ENV(qsub_u16, neon_u16, 2)
dest = tmp; \
} while(0)
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
-NEON_VOP_ENV(qsub_s8, neon_s8, 4)
+NEON_VOP(qsub_s8, neon_s8, 4)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
-NEON_VOP_ENV(qsub_s16, neon_s16, 2)
+NEON_VOP(qsub_s16, neon_s16, 2)
#undef NEON_FN
#undef NEON_SSAT
+uint32_t HELPER(neon_qsub_s32)(uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
+ SET_QC();
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_qsub_s64)(uint64_t src1, uint64_t src2)
+{
+ uint64_t res;
+
+ res = src1 - src2;
+ if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
+ SET_QC();
+ res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+ }
+ return res;
+}
+
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
NEON_VOP(hadd_s8, neon_s8, 4)
NEON_VOP(hadd_u8, neon_u8, 4)
@@ -392,7 +453,8 @@ NEON_VOP(abd_u32, neon_u32, 1)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8 || \
+ tmp <= -(ssize_t)sizeof(src1) * 8) { \
dest = 0; \
} else if (tmp < 0) { \
dest = src1 >> -tmp; \
@@ -420,9 +482,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
dest = 0; \
- } else if (tmp <= -sizeof(src1) * 8) { \
+ } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
dest = src1 >> (sizeof(src1) * 8 - 1); \
} else if (tmp < 0) { \
dest = src1 >> -tmp; \
@@ -453,14 +515,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8) { \
+ if ((tmp >= (ssize_t)sizeof(src1) * 8) \
+ || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
dest = 0; \
- } else if (tmp < -sizeof(src1) * 8) { \
- dest = src1 >> (sizeof(src1) * 8 - 1); \
- } else if (tmp == -sizeof(src1) * 8) { \
- dest = src1 >> (tmp - 1); \
- dest++; \
- dest >>= 1; \
} else if (tmp < 0) { \
dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
} else { \
@@ -468,23 +525,45 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
}} while (0)
NEON_VOP(rshl_s8, neon_s8, 4)
NEON_VOP(rshl_s16, neon_s16, 2)
-NEON_VOP(rshl_s32, neon_s32, 1)
#undef NEON_FN
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator. */
+uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+ int32_t dest;
+ int32_t val = (int32_t)valop;
+ int8_t shift = (int8_t)shiftop;
+ if ((shift >= 32) || (shift <= -32)) {
+ dest = 0;
+ } else if (shift < 0) {
+ int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+ dest = big_dest >> -shift;
+ } else {
+ dest = val << shift;
+ }
+ return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
{
int8_t shift = (int8_t)shiftop;
int64_t val = valop;
- if (shift >= 64) {
+ if ((shift >= 64) || (shift <= -64)) {
val = 0;
- } else if (shift < -64) {
- val >>= 63;
- } else if (shift == -63) {
- val >>= 63;
- val++;
- val >>= 1;
} else if (shift < 0) {
- val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+ val >>= (-shift - 1);
+ if (val == INT64_MAX) {
+ /* In this case, it means that the rounding constant is 1,
+ * and the addition would overflow. Return the actual
+ * result directly. */
+ val = 0x4000000000000000LL;
+ } else {
+ val++;
+ val >>= 1;
+ }
} else {
val <<= shift;
}
@@ -494,10 +573,11 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8 || \
+ tmp < -(ssize_t)sizeof(src1) * 8) { \
dest = 0; \
- } else if (tmp == -sizeof(src1) * 8) { \
- dest = src1 >> (tmp - 1); \
+ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
+ dest = src1 >> (-tmp - 1); \
} else if (tmp < 0) { \
dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
} else { \
@@ -505,20 +585,48 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
}} while (0)
NEON_VOP(rshl_u8, neon_u8, 4)
NEON_VOP(rshl_u16, neon_u16, 2)
-NEON_VOP(rshl_u32, neon_u32, 1)
#undef NEON_FN
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator. */
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+{
+ uint32_t dest;
+ int8_t shift = (int8_t)shiftop;
+ if (shift >= 32 || shift < -32) {
+ dest = 0;
+ } else if (shift == -32) {
+ dest = val >> 31;
+ } else if (shift < 0) {
+ uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+ dest = big_dest >> -shift;
+ } else {
+ dest = val << shift;
+ }
+ return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
{
int8_t shift = (uint8_t)shiftop;
- if (shift >= 64 || shift < 64) {
+ if (shift >= 64 || shift < -64) {
val = 0;
} else if (shift == -64) {
/* Rounding a 1-bit result just preserves that bit. */
val >>= 63;
- } if (shift < 0) {
- val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
- val >>= -shift;
+ } else if (shift < 0) {
+ val >>= (-shift - 1);
+ if (val == UINT64_MAX) {
+ /* In this case, it means that the rounding constant is 1,
+ * and the addition would overflow. Return the actual
+ * result directly. */
+ val = 0x8000000000000000ULL;
+ } else {
+ val++;
+ val >>= 1;
+ }
} else {
val <<= shift;
}
@@ -528,14 +636,14 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
if (src1) { \
SET_QC(); \
dest = ~0; \
} else { \
dest = 0; \
} \
- } else if (tmp <= -sizeof(src1) * 8) { \
+ } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
dest = 0; \
} else if (tmp < 0) { \
dest = src1 >> -tmp; \
@@ -546,20 +654,18 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
dest = ~0; \
} \
}} while (0)
-NEON_VOP_ENV(qshl_u8, neon_u8, 4)
-NEON_VOP_ENV(qshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qshl_u32, neon_u32, 1)
+NEON_VOP(qshl_u8, neon_u8, 4)
+NEON_VOP(qshl_u16, neon_u16, 2)
+NEON_VOP(qshl_u32, neon_u32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
+uint64_t HELPER(neon_qshl_u64)(uint64_t val, uint64_t shiftop)
{
int8_t shift = (int8_t)shiftop;
if (shift >= 64) {
if (val) {
val = ~(uint64_t)0;
SET_QC();
- } else {
- val = 0;
}
} else if (shift <= -64) {
val = 0;
@@ -579,11 +685,17 @@ uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp >= sizeof(src1) * 8) { \
- if (src1) \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
+ if (src1) { \
SET_QC(); \
- dest = src1 >> 31; \
- } else if (tmp <= -sizeof(src1) * 8) { \
+ dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
+ if (src1 > 0) { \
+ dest--; \
+ } \
+ } else { \
+ dest = src1; \
+ } \
+ } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
dest = src1 >> 31; \
} else if (tmp < 0) { \
dest = src1 >> -tmp; \
@@ -591,24 +703,27 @@ uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
dest = src1 << tmp; \
if ((dest >> tmp) != src1) { \
SET_QC(); \
- dest = src2 >> 31; \
+ dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
+ if (src1 > 0) { \
+ dest--; \
+ } \
} \
}} while (0)
-NEON_VOP_ENV(qshl_s8, neon_s8, 4)
-NEON_VOP_ENV(qshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qshl_s32, neon_s32, 1)
+NEON_VOP(qshl_s8, neon_s8, 4)
+NEON_VOP(qshl_s16, neon_s16, 2)
+NEON_VOP(qshl_s32, neon_s32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshl_s64)(uint64_t valop, uint64_t shiftop)
{
int8_t shift = (uint8_t)shiftop;
int64_t val = valop;
if (shift >= 64) {
if (val) {
SET_QC();
- val = (val >> 63) & ~SIGNBIT64;
+ val = (val >> 63) ^ ~SIGNBIT64;
}
- } else if (shift <= 64) {
+ } else if (shift <= -64) {
val >>= 63;
} else if (shift < 0) {
val >>= -shift;
@@ -623,12 +738,70 @@ uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
return val;
}
+#define NEON_FN(dest, src1, src2) do { \
+ if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
+ SET_QC(); \
+ dest = 0; \
+ } else { \
+ int8_t tmp; \
+ tmp = (int8_t)src2; \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
+ if (src1) { \
+ SET_QC(); \
+ dest = ~0; \
+ } else { \
+ dest = 0; \
+ } \
+ } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
+ dest = 0; \
+ } else if (tmp < 0) { \
+ dest = src1 >> -tmp; \
+ } else { \
+ dest = src1 << tmp; \
+ if ((dest >> tmp) != src1) { \
+ SET_QC(); \
+ dest = ~0; \
+ } \
+ } \
+ }} while (0)
+NEON_VOP(qshlu_s8, neon_u8, 4)
+NEON_VOP(qshlu_s16, neon_u16, 2)
+#undef NEON_FN
+
+uint32_t HELPER(neon_qshlu_s32)(uint32_t valop, uint32_t shiftop)
+{
+ if ((int32_t)valop < 0) {
+ SET_QC();
+ return 0;
+ }
+ return helper_neon_qshl_u32(valop, shiftop);
+}
+
+uint64_t HELPER(neon_qshlu_s64)(uint64_t valop, uint64_t shiftop)
+{
+ if ((int64_t)valop < 0) {
+ SET_QC();
+ return 0;
+ }
+ return helper_neon_qshl_u64(valop, shiftop);
+}
/* FIXME: This is wrong. */
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp < 0) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
+ if (src1) { \
+ SET_QC(); \
+ dest = ~0; \
+ } else { \
+ dest = 0; \
+ } \
+ } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
+ dest = 0; \
+ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
+ dest = src1 >> (sizeof(src1) * 8 - 1); \
+ } else if (tmp < 0) { \
dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
} else { \
dest = src1 << tmp; \
@@ -637,16 +810,65 @@ uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
dest = ~0; \
} \
}} while (0)
-NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
-NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
+NEON_VOP(qrshl_u8, neon_u8, 4)
+NEON_VOP(qrshl_u16, neon_u16, 2)
#undef NEON_FN
-uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator. */
+uint32_t HELPER(neon_qrshl_u32)(uint32_t val, uint32_t shiftop)
{
+ uint32_t dest;
int8_t shift = (int8_t)shiftop;
- if (shift < 0) {
- val = (val + (1 << (-1 - shift))) >> -shift;
+ if (shift >= 32) {
+ if (val) {
+ SET_QC();
+ dest = ~0;
+ } else {
+ dest = 0;
+ }
+ } else if (shift < -32) {
+ dest = 0;
+ } else if (shift == -32) {
+ dest = val >> 31;
+ } else if (shift < 0) {
+ uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+ dest = big_dest >> -shift;
+ } else {
+ dest = val << shift;
+ if ((dest >> shift) != val) {
+ SET_QC();
+ dest = ~0;
+ }
+ }
+ return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
+uint64_t HELPER(neon_qrshl_u64)(uint64_t val, uint64_t shiftop)
+{
+ int8_t shift = (int8_t)shiftop;
+ if (shift >= 64) {
+ if (val) {
+ SET_QC();
+ val = ~0;
+ }
+ } else if (shift < -64) {
+ val = 0;
+ } else if (shift == -64) {
+ val >>= 63;
+ } else if (shift < 0) {
+ val >>= (-shift - 1);
+ if (val == UINT64_MAX) {
+ /* In this case, it means that the rounding constant is 1,
+ * and the addition would overflow. Return the actual
+ * result directly. */
+ val = 0x8000000000000000ULL;
+ } else {
+ val++;
+ val >>= 1;
+ }
} else { \
uint64_t tmp = val;
val <<= shift;
@@ -661,33 +883,94 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
- if (tmp < 0) { \
+ if (tmp >= (ssize_t)sizeof(src1) * 8) { \
+ if (src1) { \
+ SET_QC(); \
+ dest = (1 << (sizeof(src1) * 8 - 1)); \
+ if (src1 > 0) { \
+ dest--; \
+ } \
+ } else { \
+ dest = 0; \
+ } \
+ } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
+ dest = 0; \
+ } else if (tmp < 0) { \
dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
} else { \
dest = src1 << tmp; \
if ((dest >> tmp) != src1) { \
SET_QC(); \
- dest = src1 >> 31; \
+ dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
+ if (src1 > 0) { \
+ dest--; \
+ } \
} \
}} while (0)
-NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
-NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
+NEON_VOP(qrshl_s8, neon_s8, 4)
+NEON_VOP(qrshl_s16, neon_s16, 2)
#undef NEON_FN
-uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator. */
+uint32_t HELPER(neon_qrshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+ int32_t dest;
+ int32_t val = (int32_t)valop;
+ int8_t shift = (int8_t)shiftop;
+ if (shift >= 32) {
+ if (val) {
+ SET_QC();
+ dest = (val >> 31) ^ ~SIGNBIT;
+ } else {
+ dest = 0;
+ }
+ } else if (shift <= -32) {
+ dest = 0;
+ } else if (shift < 0) {
+ int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+ dest = big_dest >> -shift;
+ } else {
+ dest = val << shift;
+ if ((dest >> shift) != val) {
+ SET_QC();
+ dest = (val >> 31) ^ ~SIGNBIT;
+ }
+ }
+ return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values. */
+uint64_t HELPER(neon_qrshl_s64)(uint64_t valop, uint64_t shiftop)
{
int8_t shift = (uint8_t)shiftop;
int64_t val = valop;
- if (shift < 0) {
- val = (val + (1 << (-1 - shift))) >> -shift;
+ if (shift >= 64) {
+ if (val) {
+ SET_QC();
+ val = (val >> 63) ^ ~SIGNBIT64;
+ }
+ } else if (shift <= -64) {
+ val = 0;
+ } else if (shift < 0) {
+ val >>= (-shift - 1);
+ if (val == INT64_MAX) {
+ /* In this case, it means that the rounding constant is 1,
+ * and the addition would overflow. Return the actual
+ * result directly. */
+ val = 0x4000000000000000ULL;
+ } else {
+ val++;
+ val >>= 1;
+ }
} else {
- int64_t tmp = val;;
+ int64_t tmp = val;
val <<= shift;
if ((val >> shift) != tmp) {
SET_QC();
- val = tmp >> 31;
+ val = (tmp >> 63) ^ ~SIGNBIT64;
}
}
return val;
@@ -750,6 +1033,36 @@ uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
return result;
}
+uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
+{
+ uint64_t result = 0;
+ uint64_t mask;
+ uint64_t op2ex = op2;
+ op2ex = (op2ex & 0xff) |
+ ((op2ex & 0xff00) << 8) |
+ ((op2ex & 0xff0000) << 16) |
+ ((op2ex & 0xff000000) << 24);
+ while (op1) {
+ mask = 0;
+ if (op1 & 1) {
+ mask |= 0xffff;
+ }
+ if (op1 & (1 << 8)) {
+ mask |= (0xffffU << 16);
+ }
+ if (op1 & (1 << 16)) {
+ mask |= (0xffffULL << 32);
+ }
+ if (op1 & (1 << 24)) {
+ mask |= (0xffffULL << 48);
+ }
+ result ^= op2ex & mask;
+ op1 = (op1 >> 1) & 0x7f7f7f7f;
+ op2ex <<= 1;
+ }
+ return result;
+}
+
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
NEON_VOP(tst_u8, neon_u8, 4)
NEON_VOP(tst_u16, neon_u16, 2)
@@ -824,8 +1137,9 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x)
if ((tmp ^ (tmp << 1)) & SIGNBIT) { \
SET_QC(); \
tmp = (tmp >> 31) ^ ~SIGNBIT; \
+ } else { \
+ tmp <<= 1; \
} \
- tmp <<= 1; \
if (round) { \
int32_t old = tmp; \
tmp += 1 << 15; \
@@ -837,10 +1151,10 @@ uint32_t HELPER(neon_cnt_u8)(uint32_t x)
dest = tmp >> 16; \
} while(0)
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0)
-NEON_VOP_ENV(qdmulh_s16, neon_s16, 2)
+NEON_VOP(qdmulh_s16, neon_s16, 2)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1)
-NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
+NEON_VOP(qrdmulh_s16, neon_s16, 2)
#undef NEON_FN
#undef NEON_QDMULH16
@@ -863,10 +1177,10 @@ NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2)
dest = tmp >> 32; \
} while(0)
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0)
-NEON_VOP_ENV(qdmulh_s32, neon_s32, 1)
+NEON_VOP(qdmulh_s32, neon_s32, 1)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1)
-NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1)
+NEON_VOP(qrdmulh_s32, neon_s32, 1)
#undef NEON_FN
#undef NEON_QDMULH32
@@ -907,7 +1221,34 @@ uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x)
return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000);
}
-uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_unarrow_sat8)(uint64_t x)
+{
+ uint16_t s;
+ uint8_t d;
+ uint32_t res = 0;
+#define SAT8(n) \
+ s = x >> n; \
+ if (s & 0x8000) { \
+ SET_QC(); \
+ } else { \
+ if (s > 0xff) { \
+ d = 0xff; \
+ SET_QC(); \
+ } else { \
+ d = s; \
+ } \
+ res |= (uint32_t)d << (n / 2); \
+ }
+
+ SAT8(0);
+ SAT8(16);
+ SAT8(32);
+ SAT8(48);
+#undef SAT8
+ return res;
+}
+
+uint32_t HELPER(neon_narrow_sat_u8)(uint64_t x)
{
uint16_t s;
uint8_t d;
@@ -930,7 +1271,7 @@ uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x)
return res;
}
-uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s8)(uint64_t x)
{
int16_t s;
uint8_t d;
@@ -953,7 +1294,30 @@ uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x)
return res;
}
-uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_unarrow_sat16)(uint64_t x)
+{
+ uint32_t high;
+ uint32_t low;
+ low = x;
+ if (low & 0x80000000) {
+ low = 0;
+ SET_QC();
+ } else if (low > 0xffff) {
+ low = 0xffff;
+ SET_QC();
+ }
+ high = x >> 32;
+ if (high & 0x80000000) {
+ high = 0;
+ SET_QC();
+ } else if (high > 0xffff) {
+ high = 0xffff;
+ SET_QC();
+ }
+ return low | (high << 16);
+}
+
+uint32_t HELPER(neon_narrow_sat_u16)(uint64_t x)
{
uint32_t high;
uint32_t low;
@@ -970,7 +1334,7 @@ uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x)
return low | (high << 16);
}
-uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s16)(uint64_t x)
{
int32_t low;
int32_t high;
@@ -987,7 +1351,20 @@ uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x)
return (uint16_t)low | (high << 16);
}
-uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_unarrow_sat32)(uint64_t x)
+{
+ if (x & 0x8000000000000000ull) {
+ SET_QC();
+ return 0;
+ }
+ if (x > 0xffffffffu) {
+ SET_QC();
+ return 0xffffffffu;
+ }
+ return x;
+}
+
+uint32_t HELPER(neon_narrow_sat_u32)(uint64_t x)
{
if (x > 0xffffffffu) {
SET_QC();
@@ -996,11 +1373,11 @@ uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x)
return x;
}
-uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x)
+uint32_t HELPER(neon_narrow_sat_s32)(uint64_t x)
{
if ((int64_t)x != (int32_t)x) {
SET_QC();
- return (x >> 63) ^ 0x7fffffff;
+ return ((int64_t)x >> 63) ^ 0x7fffffff;
}
return x;
}
@@ -1103,7 +1480,7 @@ uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b)
return (a - b) ^ mask;
}
-uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(neon_addl_saturate_s32)(uint64_t a, uint64_t b)
{
uint32_t x, y;
uint32_t low, high;
@@ -1125,7 +1502,7 @@ uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b)
return low | ((uint64_t)high << 32);
}
-uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
+uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b)
{
uint64_t result;
@@ -1137,9 +1514,13 @@ uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b)
return result;
}
-#define DO_ABD(dest, x, y, type) do { \
- type tmp_x = x; \
- type tmp_y = y; \
+/* We have to do the arithmetic in a larger type than
+ * the input type, because for example with a signed 32 bit
+ * op the absolute difference can overflow a signed 32 bit value.
+ */
+#define DO_ABD(dest, x, y, intype, arithtype) do { \
+ arithtype tmp_x = (intype)(x); \
+ arithtype tmp_y = (intype)(y); \
dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
} while(0)
@@ -1147,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
{
uint64_t tmp;
uint64_t result;
- DO_ABD(result, a, b, uint8_t);
- DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
+ DO_ABD(result, a, b, uint8_t, uint32_t);
+ DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
result |= tmp << 16;
- DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
+ DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
result |= tmp << 32;
- DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
+ DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
result |= tmp << 48;
return result;
}
@@ -1161,12 +1542,12 @@ uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
{
uint64_t tmp;
uint64_t result;
- DO_ABD(result, a, b, int8_t);
- DO_ABD(tmp, a >> 8, b >> 8, int8_t);
+ DO_ABD(result, a, b, int8_t, int32_t);
+ DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t);
result |= tmp << 16;
- DO_ABD(tmp, a >> 16, b >> 16, int8_t);
+ DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t);
result |= tmp << 32;
- DO_ABD(tmp, a >> 24, b >> 24, int8_t);
+ DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t);
result |= tmp << 48;
return result;
}
@@ -1175,8 +1556,8 @@ uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
{
uint64_t tmp;
uint64_t result;
- DO_ABD(result, a, b, uint16_t);
- DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
+ DO_ABD(result, a, b, uint16_t, uint32_t);
+ DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
return result | (tmp << 32);
}
@@ -1184,22 +1565,22 @@ uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
{
uint64_t tmp;
uint64_t result;
- DO_ABD(result, a, b, int16_t);
- DO_ABD(tmp, a >> 16, b >> 16, int16_t);
+ DO_ABD(result, a, b, int16_t, int32_t);
+ DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t);
return result | (tmp << 32);
}
uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
{
uint64_t result;
- DO_ABD(result, a, b, uint32_t);
+ DO_ABD(result, a, b, uint32_t, uint64_t);
return result;
}
uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
{
uint64_t result;
- DO_ABD(result, a, b, int32_t);
+ DO_ABD(result, a, b, int32_t, int64_t);
return result;
}
#undef DO_ABD
@@ -1275,7 +1656,6 @@ uint64_t HELPER(neon_negl_u16)(uint64_t x)
return result;
}
-#include <stdio.h>
uint64_t HELPER(neon_negl_u32)(uint64_t x)
{
uint32_t low = -x;
@@ -1298,7 +1678,7 @@ uint64_t HELPER(neon_negl_u64)(uint64_t x)
} else if (x < 0) { \
x = -x; \
}} while (0)
-uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qabs_s8)(uint32_t x)
{
neon_s8 vec;
NEON_UNPACK(neon_s8, vec, x);
@@ -1318,7 +1698,7 @@ uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x)
} else { \
x = -x; \
}} while (0)
-uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qneg_s8)(uint32_t x)
{
neon_s8 vec;
NEON_UNPACK(neon_s8, vec, x);
@@ -1338,7 +1718,7 @@ uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x)
} else if (x < 0) { \
x = -x; \
}} while (0)
-uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qabs_s16)(uint32_t x)
{
neon_s16 vec;
NEON_UNPACK(neon_s16, vec, x);
@@ -1356,7 +1736,7 @@ uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x)
} else { \
x = -x; \
}} while (0)
-uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qneg_s16)(uint32_t x)
{
neon_s16 vec;
NEON_UNPACK(neon_s16, vec, x);
@@ -1367,7 +1747,7 @@ uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x)
}
#undef DO_QNEG16
-uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qabs_s32)(uint32_t x)
{
if (x == SIGNBIT) {
SET_QC();
@@ -1378,7 +1758,7 @@ uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x)
return x;
}
-uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x)
+uint32_t HELPER(neon_qneg_s32)(uint32_t x)
{
if (x == SIGNBIT) {
SET_QC();
@@ -1392,66 +1772,251 @@ uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x)
/* NEON Float helpers. */
uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b)
{
- float32 f0 = vfp_itos(a);
- float32 f1 = vfp_itos(b);
- return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b;
+ return float32_val(float32_min(make_float32(a), make_float32(b), NFS));
}
uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b)
{
- float32 f0 = vfp_itos(a);
- float32 f1 = vfp_itos(b);
- return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b;
+ return float32_val(float32_max(make_float32(a), make_float32(b), NFS));
}
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b)
{
- float32 f0 = vfp_itos(a);
- float32 f1 = vfp_itos(b);
- return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1)
- ? float32_sub(f0, f1, NFS)
- : float32_sub(f1, f0, NFS));
+ float32 f0 = make_float32(a);
+ float32 f1 = make_float32(b);
+ return float32_val(float32_abs(float32_sub(f0, f1, NFS)));
}
uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b)
{
- return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS));
+ return float32_val(float32_add(make_float32(a), make_float32(b), NFS));
}
uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b)
{
- return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS));
+ return float32_val(float32_sub(make_float32(a), make_float32(b), NFS));
}
uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b)
{
- return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS));
+ return float32_val(float32_mul(make_float32(a), make_float32(b), NFS));
}
-/* Floating point comparisons produce an integer result. */
-#define NEON_VOP_FCMP(name, cmp) \
-uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \
-{ \
- if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \
- return ~0; \
- else \
- return 0; \
+/* Floating point comparisons produce an integer result.
+ * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
+ * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
+ */
+uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b)
+{
+ return -float32_eq_quiet(make_float32(a), make_float32(b), NFS);
+}
+
+uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b)
+{
+ return -float32_le(make_float32(b), make_float32(a), NFS);
}
-NEON_VOP_FCMP(ceq_f32, ==)
-NEON_VOP_FCMP(cge_f32, >=)
-NEON_VOP_FCMP(cgt_f32, >)
+uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b)
+{
+ return -float32_lt(make_float32(b), make_float32(a), NFS);
+}
uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b)
{
- float32 f0 = float32_abs(vfp_itos(a));
- float32 f1 = float32_abs(vfp_itos(b));
- return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0;
+ float32 f0 = float32_abs(make_float32(a));
+ float32 f1 = float32_abs(make_float32(b));
+ return -float32_le(f1, f0, NFS);
}
uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b)
{
- float32 f0 = float32_abs(vfp_itos(a));
- float32 f1 = float32_abs(vfp_itos(b));
- return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0;
+ float32 f0 = float32_abs(make_float32(a));
+ float32 f1 = float32_abs(make_float32(b));
+ return -float32_lt(f1, f0, NFS);
+}
+
+#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1))
+
+void HELPER(neon_qunzip8)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8)
+ | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24)
+ | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40)
+ | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56);
+ uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8)
+ | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24)
+ | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
+ | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56);
+ uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8)
+ | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24)
+ | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40)
+ | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56);
+ uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8)
+ | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24)
+ | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40)
+ | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_qunzip16)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16)
+ | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48);
+ uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16)
+ | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48);
+ uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16)
+ | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48);
+ uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16)
+ | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_qunzip32)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32);
+ uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32);
+ uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32);
+ uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_unzip8)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm = float64_val(env->vfp.regs[rm]);
+ uint64_t zd = float64_val(env->vfp.regs[rd]);
+ uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8)
+ | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24)
+ | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40)
+ | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56);
+ uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8)
+ | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24)
+ | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40)
+ | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rd] = make_float64(d0);
+}
+
+void HELPER(neon_unzip16)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm = float64_val(env->vfp.regs[rm]);
+ uint64_t zd = float64_val(env->vfp.regs[rd]);
+ uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16)
+ | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48);
+ uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16)
+ | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rd] = make_float64(d0);
+}
+
+void HELPER(neon_qzip8)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
+ | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
+ | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
+ | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
+ uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
+ | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
+ | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
+ | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
+ uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
+ | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
+ | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
+ | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
+ uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
+ | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
+ | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
+ | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_qzip16)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
+ | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
+ uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
+ | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
+ uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
+ | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
+ uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
+ | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_qzip32)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm0 = float64_val(env->vfp.regs[rm]);
+ uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
+ uint64_t zd0 = float64_val(env->vfp.regs[rd]);
+ uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
+ uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
+ uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
+ uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
+ uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rm + 1] = make_float64(m1);
+ env->vfp.regs[rd] = make_float64(d0);
+ env->vfp.regs[rd + 1] = make_float64(d1);
+}
+
+void HELPER(neon_zip8)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm = float64_val(env->vfp.regs[rm]);
+ uint64_t zd = float64_val(env->vfp.regs[rd]);
+ uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
+ | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
+ | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
+ | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
+ uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
+ | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
+ | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
+ | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rd] = make_float64(d0);
+}
+
+void HELPER(neon_zip16)(uint32_t rd, uint32_t rm)
+{
+ uint64_t zm = float64_val(env->vfp.regs[rm]);
+ uint64_t zd = float64_val(env->vfp.regs[rd]);
+ uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
+ | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
+ uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
+ | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
+ env->vfp.regs[rm] = make_float64(m0);
+ env->vfp.regs[rd] = make_float64(d0);
}
diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h
index 29f77ba..c02c92a 100644
--- a/target-arm/op_addsub.h
+++ b/target-arm/op_addsub.h
@@ -73,8 +73,8 @@ uint32_t HELPER(glue(PFX,subaddx))(uint32_t a, uint32_t b GE_ARG)
uint32_t res = 0;
DECLARE_GE;
- ADD16(a, b, 0);
- SUB16(a >> 16, b >> 16, 1);
+ ADD16(a, b >> 16, 0);
+ SUB16(a >> 16, b, 1);
SET_GE;
return res;
}
@@ -84,8 +84,8 @@ uint32_t HELPER(glue(PFX,addsubx))(uint32_t a, uint32_t b GE_ARG)
uint32_t res = 0;
DECLARE_GE;
- SUB16(a, b, 0);
- ADD16(a >> 16, b >> 16, 1);
+ SUB16(a, b >> 16, 0);
+ ADD16(a >> 16, b, 1);
SET_GE;
return res;
}
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index fc06536..ec6e5cc 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -14,11 +14,10 @@
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "exec.h"
-#include "helpers.h"
+#include "helper.h"
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
@@ -29,20 +28,6 @@ void raise_exception(int tt)
cpu_loop_exit();
}
-/* thread support */
-
-static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
-
-void cpu_lock(void)
-{
- spin_lock(&global_cpu_lock);
-}
-
-void cpu_unlock(void)
-{
- spin_unlock(&global_cpu_lock);
-}
-
uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
uint32_t rn, uint32_t maxindex)
{
@@ -67,12 +52,6 @@ uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
#if !defined(CONFIG_USER_ONLY)
-//#define ALIGNED_ONLY 1
-
-#if ALIGNED_ONLY == 1
-static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr);
-#endif
-
#define MMUSUFFIX _mmu
#define SHIFT 0
@@ -87,21 +66,6 @@ static void do_unaligned_access (target_ulong addr, int is_write, int is_user, v
#define SHIFT 3
#include "softmmu_template.h"
-#if ALIGNED_ONLY == 1
-static void do_unaligned_access (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
-{
- //printf("::UNALIGNED:: addr=%lx is_write=%d is_user=%d retaddr=%p\n", addr, is_write, is_user, retaddr);
- if (mmu_idx)
- {
- env = cpu_single_env;
- env->cp15.c5_data = 0x00000001; /* corresponds to an alignment fault */
- env->cp15.c6_data = addr;
- env->exception_index = EXCP_DATA_ABORT;
- cpu_loop_exit();
- }
-}
-#endif
-
/* try to fill the TLB and return an exception if error. If retaddr is
NULL, it means that the function was called in C code (i.e. not
from generated code or from helper.c) */
@@ -134,6 +98,47 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
env = saved_env;
}
+void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val)
+{
+ int cp_num = (insn >> 8) & 0xf;
+ int cp_info = (insn >> 5) & 7;
+ int src = (insn >> 16) & 0xf;
+ int operand = insn & 0xf;
+
+ if (env->cp[cp_num].cp_write)
+ env->cp[cp_num].cp_write(env->cp[cp_num].opaque,
+ cp_info, src, operand, val, GETPC());
+ }
+
+uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
+{
+ int cp_num = (insn >> 8) & 0xf;
+ int cp_info = (insn >> 5) & 7;
+ int dest = (insn >> 16) & 0xf;
+ int operand = insn & 0xf;
+
+ if (env->cp[cp_num].cp_read)
+ return env->cp[cp_num].cp_read(env->cp[cp_num].opaque,
+ cp_info, dest, operand, GETPC());
+ return 0;
+}
+
+#else
+
+void HELPER(set_cp)(CPUState *env, uint32_t insn, uint32_t val)
+{
+ int op1 = (insn >> 8) & 0xf;
+ cpu_abort(env, "cp%i insn %08x\n", op1, insn);
+ return;
+}
+
+uint32_t HELPER(get_cp)(CPUState *env, uint32_t insn)
+{
+ int op1 = (insn >> 8) & 0xf;
+ cpu_abort(env, "cp%i insn %08x\n", op1, insn);
+ return 0;
+}
+
#endif
/* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating
@@ -402,14 +407,6 @@ uint32_t HELPER(sar)(uint32_t x, uint32_t i)
return (int32_t)x >> shift;
}
-uint32_t HELPER(ror)(uint32_t x, uint32_t i)
-{
- int shift = i & 0xff;
- if (shift == 0)
- return x;
- return (x >> shift) | (x << (32 - shift));
-}
-
uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
{
int shift = i & 0xff;
@@ -470,109 +467,126 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
}
}
-uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2)
-{
- uint64_t res;
-
- res = src1 + src2;
- if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
- env->QF = 1;
- res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
- }
- return res;
-}
-
-uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2)
-{
- uint64_t res;
-
- res = src1 + src2;
- if (res < src1) {
- env->QF = 1;
- res = ~(uint64_t)0;
- }
- return res;
-}
-
-uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2)
-{
- uint64_t res;
-
- res = src1 - src2;
- if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
- env->QF = 1;
- res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
- }
- return res;
-}
-
-uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
-{
- uint64_t res;
-
- if (src1 < src2) {
- env->QF = 1;
- res = 0;
- } else {
- res = src1 - src2;
+void HELPER(neon_vldst_all)(uint32_t insn)
+{
+#if defined(CONFIG_USER_ONLY)
+#define LDB(addr) ldub(addr)
+#define LDW(addr) lduw(addr)
+#define LDL(addr) ldl(addr)
+#define LDQ(addr) ldq(addr)
+#define STB(addr, val) stb(addr, val)
+#define STW(addr, val) stw(addr, val)
+#define STL(addr, val) stl(addr, val)
+#define STQ(addr, val) stq(addr, val)
+#else
+ int user = cpu_mmu_index(env);
+#define LDB(addr) slow_ldb_mmu(addr, user, GETPC())
+#define LDW(addr) slow_ldw_mmu(addr, user, GETPC())
+#define LDL(addr) slow_ldl_mmu(addr, user, GETPC())
+#define LDQ(addr) slow_ldq_mmu(addr, user, GETPC())
+#define STB(addr, val) slow_stb_mmu(addr, val, user, GETPC())
+#define STW(addr, val) slow_stw_mmu(addr, val, user, GETPC())
+#define STL(addr, val) slow_stl_mmu(addr, val, user, GETPC())
+#define STQ(addr, val) slow_stq_mmu(addr, val, user, GETPC())
+#endif
+ static const struct {
+ int nregs;
+ int interleave;
+ int spacing;
+ } neon_ls_element_type[11] = {
+ {4, 4, 1},
+ {4, 4, 2},
+ {4, 1, 1},
+ {4, 2, 1},
+ {3, 3, 1},
+ {3, 3, 2},
+ {3, 1, 1},
+ {1, 1, 1},
+ {2, 2, 1},
+ {2, 2, 2},
+ {2, 1, 1}
+ };
+
+ const int op = (insn >> 8) & 0xf;
+ const int size = (insn >> 6) & 3;
+ int rd = ((insn >> 12) & 0x0f) | ((insn >> 18) & 0x10);
+ const int rn = (insn >> 16) & 0xf;
+ const int load = (insn & (1 << 21)) != 0;
+ const int nregs = neon_ls_element_type[op].nregs;
+ const int interleave = neon_ls_element_type[op].interleave;
+ const int spacing = neon_ls_element_type[op].spacing;
+ uint32_t addr = env->regs[rn];
+ const int stride = (1 << size) * interleave;
+ int i, reg;
+ uint64_t tmp64;
+
+ for (reg = 0; reg < nregs; reg++) {
+ if (interleave > 2 || (interleave == 2 && nregs == 2)) {
+ addr = env->regs[rn] + (1 << size) * reg;
+ } else if (interleave == 2 && nregs == 4 && reg == 2) {
+ addr = env->regs[rn] + (1 << size);
+ }
+ switch (size) {
+ case 3:
+ if (load) {
+ env->vfp.regs[rd] = make_float64(LDQ(addr));
+ } else {
+ STQ(addr, float64_val(env->vfp.regs[rd]));
+ }
+ addr += stride;
+ break;
+ case 2:
+ if (load) {
+ tmp64 = (uint32_t)LDL(addr);
+ addr += stride;
+ tmp64 |= (uint64_t)LDL(addr) << 32;
+ addr += stride;
+ env->vfp.regs[rd] = make_float64(tmp64);
+ } else {
+ tmp64 = float64_val(env->vfp.regs[rd]);
+ STL(addr, tmp64);
+ addr += stride;
+ STL(addr, tmp64 >> 32);
+ addr += stride;
+ }
+ break;
+ case 1:
+ if (load) {
+ tmp64 = 0ull;
+ for (i = 0; i < 4; i++, addr += stride) {
+ tmp64 |= (uint64_t)LDW(addr) << (i * 16);
+ }
+ env->vfp.regs[rd] = make_float64(tmp64);
+ } else {
+ tmp64 = float64_val(env->vfp.regs[rd]);
+ for (i = 0; i < 4; i++, addr += stride, tmp64 >>= 16) {
+ STW(addr, tmp64);
+ }
+ }
+ break;
+ case 0:
+ if (load) {
+ tmp64 = 0ull;
+ for (i = 0; i < 8; i++, addr += stride) {
+ tmp64 |= (uint64_t)LDB(addr) << (i * 8);
+ }
+ env->vfp.regs[rd] = make_float64(tmp64);
+ } else {
+ tmp64 = float64_val(env->vfp.regs[rd]);
+ for (i = 0; i < 8; i++, addr += stride, tmp64 >>= 8) {
+ STB(addr, tmp64);
+ }
+ }
+ break;
+ }
+ rd += spacing;
}
- return res;
-}
-
-/* These need to return a pair of value, so still use T0/T1. */
-/* Transpose. Argument order is rather strange to avoid special casing
- the tranlation code.
- On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */
-void HELPER(neon_trn_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
- rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_trn_u16)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 << 16) | (T1 & 0xffff);
- rm = (T1 >> 16) | (T0 & 0xffff0000);
- T0 = rd;
- T1 = rm;
-}
-
-/* Worker routines for zip and unzip. */
-void HELPER(neon_unzip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
- | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
- rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
- | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
- | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
- rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
- | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u16)(void)
-{
- uint32_t tmp;
-
- tmp = (T0 & 0xffff) | (T1 << 16);
- T1 = (T1 & 0xffff0000) | (T0 >> 16);
- T0 = tmp;
+#undef LDB
+#undef LDW
+#undef LDL
+#undef LDQ
+#undef STB
+#undef STW
+#undef STL
+#undef STQ
}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 1e189f8..05712b8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -30,10 +30,14 @@
#include "tcg-op.h"
#include "qemu-log.h"
-#include "helpers.h"
+#include "helper.h"
#define GEN_HELPER 1
-#include "helpers.h"
+#include "helper.h"
+#define ENABLE_ARCH_4T arm_feature(env, ARM_FEATURE_V4T)
+#define ENABLE_ARCH_5 arm_feature(env, ARM_FEATURE_V5)
+/* currently all emulated v5 cores are also v5TE, so don't bother */
+#define ENABLE_ARCH_5TE arm_feature(env, ARM_FEATURE_V5)
#define ENABLE_ARCH_5J 0
#define ENABLE_ARCH_6 arm_feature(env, ARM_FEATURE_V6)
#define ENABLE_ARCH_6K arm_feature(env, ARM_FEATURE_V6K)
@@ -53,13 +57,15 @@ typedef struct DisasContext {
/* Thumb-2 condtional execution bits. */
int condexec_mask;
int condexec_cond;
- int condexec_mask_prev; /* mask at start of instruction/block */
struct TranslationBlock *tb;
int singlestep_enabled;
int thumb;
#if !defined(CONFIG_USER_ONLY)
int user;
#endif
+ int vfp_enabled;
+ int vec_len;
+ int vec_stride;
#ifdef CONFIG_MEMCHECK
int search_pc;
#endif
@@ -67,6 +73,8 @@ typedef struct DisasContext {
#include "translate-android.h"
+static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
+
#if defined(CONFIG_USER_ONLY)
#define IS_USER(s) 1
#else
@@ -77,74 +85,62 @@ typedef struct DisasContext {
conditional executions state has been updated. */
#define DISAS_WFI 4
#define DISAS_SWI 5
+#define DISAS_SMC 6
static TCGv_ptr cpu_env;
/* We reuse the same 64-bit temporaries for efficiency. */
static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
+static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_exclusive_addr;
+static TCGv_i32 cpu_exclusive_val;
+static TCGv_i32 cpu_exclusive_high;
+#ifdef CONFIG_USER_ONLY
+static TCGv_i32 cpu_exclusive_test;
+static TCGv_i32 cpu_exclusive_info;
+#endif
/* FIXME: These should be removed. */
-static TCGv cpu_T[2];
static TCGv cpu_F0s, cpu_F1s;
static TCGv_i64 cpu_F0d, cpu_F1d;
-#define ICOUNT_TEMP cpu_T[0]
#include "gen-icount.h"
+static const char *regnames[] =
+ { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
+
/* initialize TCG globals. */
void arm_translate_init(void)
{
+ int i;
+
cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
- cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
- cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
+ for (i = 0; i < 16; i++) {
+ cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, regs[i]),
+ regnames[i]);
+ }
+ cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, exclusive_addr), "exclusive_addr");
+ cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, exclusive_val), "exclusive_val");
+ cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, exclusive_high), "exclusive_high");
+#ifdef CONFIG_USER_ONLY
+ cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, exclusive_test), "exclusive_test");
+ cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUState, exclusive_info), "exclusive_info");
+#endif
#define GEN_HELPER 2
-#include "helpers.h"
-}
-
-/* The code generator doesn't like lots of temporaries, so maintain our own
- cache for reuse within a function. */
-#define MAX_TEMPS 8
-static int num_temps;
-static TCGv temps[MAX_TEMPS];
-
-/* Allocate a temporary variable. */
-static TCGv_i32 new_tmp(void)
-{
- TCGv tmp;
- if (num_temps == MAX_TEMPS)
- abort();
-
- if (GET_TCGV_I32(temps[num_temps]))
- return temps[num_temps++];
-
- tmp = tcg_temp_new_i32();
- temps[num_temps++] = tmp;
- return tmp;
-}
-
-/* Release a temporary variable. */
-static void dead_tmp(TCGv tmp)
-{
- int i;
- num_temps--;
- i = num_temps;
- if (TCGV_EQUAL(temps[i], tmp))
- return;
-
- /* Shuffle this temp to the last slot. */
- while (!TCGV_EQUAL(temps[i], tmp))
- i--;
- while (i < num_temps) {
- temps[i] = temps[i + 1];
- i++;
- }
- temps[i] = tmp;
+#include "helper.h"
}
static inline TCGv load_cpu_offset(int offset)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_ld_i32(tmp, cpu_env, offset);
return tmp;
}
@@ -154,7 +150,7 @@ static inline TCGv load_cpu_offset(int offset)
static inline void store_cpu_offset(TCGv var, int offset)
{
tcg_gen_st_i32(var, cpu_env, offset);
- dead_tmp(var);
+ tcg_temp_free_i32(var);
}
#define store_cpu_field(var, name) \
@@ -172,14 +168,14 @@ static void load_reg_var(DisasContext *s, TCGv var, int reg)
addr = (long)s->pc + 4;
tcg_gen_movi_i32(var, addr);
} else {
- tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
+ tcg_gen_mov_i32(var, cpu_R[reg]);
}
}
/* Create a new temporary and set it to the value of a CPU register. */
static inline TCGv load_reg(DisasContext *s, int reg)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
load_reg_var(s, tmp, reg);
return tmp;
}
@@ -192,38 +188,10 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
tcg_gen_andi_i32(var, var, ~1);
s->is_jmp = DISAS_JUMP;
}
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
- dead_tmp(var);
+ tcg_gen_mov_i32(cpu_R[reg], var);
+ tcg_temp_free_i32(var);
}
-
-/* Basic operations. */
-#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
-#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
-#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
-
-#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
-
-#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
-
-#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
-#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
-#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
-#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
-
-#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
-
/* Value extensions. */
#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
@@ -233,57 +201,57 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
-#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
+static inline void gen_set_cpsr(TCGv var, uint32_t mask)
+{
+ TCGv tmp_mask = tcg_const_i32(mask);
+ gen_helper_cpsr_write(var, tmp_mask);
+ tcg_temp_free_i32(tmp_mask);
+}
/* Set NZCV flags from the high 4 bits of var. */
#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
static void gen_exception(int excp)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, excp);
gen_helper_exception(tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
static void gen_smul_dual(TCGv a, TCGv b)
{
- TCGv tmp1 = new_tmp();
- TCGv tmp2 = new_tmp();
+ TCGv tmp1 = tcg_temp_new_i32();
+ TCGv tmp2 = tcg_temp_new_i32();
tcg_gen_ext16s_i32(tmp1, a);
tcg_gen_ext16s_i32(tmp2, b);
tcg_gen_mul_i32(tmp1, tmp1, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_sari_i32(a, a, 16);
tcg_gen_sari_i32(b, b, 16);
tcg_gen_mul_i32(b, b, a);
tcg_gen_mov_i32(a, tmp1);
- dead_tmp(tmp1);
+ tcg_temp_free_i32(tmp1);
}
/* Byteswap each halfword. */
static void gen_rev16(TCGv var)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_shri_i32(tmp, var, 8);
tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
tcg_gen_shli_i32(var, var, 8);
tcg_gen_andi_i32(var, var, 0xff00ff00);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
/* Byteswap low halfword and sign extend. */
static void gen_revsh(TCGv var)
{
- TCGv tmp = new_tmp();
- tcg_gen_shri_i32(tmp, var, 8);
- tcg_gen_andi_i32(tmp, tmp, 0x00ff);
- tcg_gen_shli_i32(var, var, 8);
- tcg_gen_ext8s_i32(var, var);
- tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_gen_ext16u_i32(var, var);
+ tcg_gen_bswap16_i32(var, var);
+ tcg_gen_ext16s_i32(var, var);
}
/* Unsigned bitfield extract. */
@@ -318,11 +286,32 @@ static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
tcg_gen_or_i32(dest, base, val);
}
-/* Round the top 32 bits of a 64-bit value. */
-static void gen_roundqd(TCGv a, TCGv b)
+/* Return (b << 32) + a. Mark inputs as dead */
+static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
{
- tcg_gen_shri_i32(a, a, 31);
- tcg_gen_add_i32(a, a, b);
+ TCGv_i64 tmp64 = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(tmp64, b);
+ tcg_temp_free_i32(b);
+ tcg_gen_shli_i64(tmp64, tmp64, 32);
+ tcg_gen_add_i64(a, tmp64, a);
+
+ tcg_temp_free_i64(tmp64);
+ return a;
+}
+
+/* Return (b << 32) - a. Mark inputs as dead. */
+static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b)
+{
+ TCGv_i64 tmp64 = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(tmp64, b);
+ tcg_temp_free_i32(b);
+ tcg_gen_shli_i64(tmp64, tmp64, 32);
+ tcg_gen_sub_i64(a, tmp64, a);
+
+ tcg_temp_free_i64(tmp64);
+ return a;
}
/* FIXME: Most targets have native widening multiplication.
@@ -334,10 +323,11 @@ static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
TCGv_i64 tmp2 = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(tmp1, a);
- dead_tmp(a);
+ tcg_temp_free_i32(a);
tcg_gen_extu_i32_i64(tmp2, b);
- dead_tmp(b);
+ tcg_temp_free_i32(b);
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
+ tcg_temp_free_i64(tmp2);
return tmp1;
}
@@ -347,50 +337,22 @@ static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
TCGv_i64 tmp2 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(tmp1, a);
- dead_tmp(a);
+ tcg_temp_free_i32(a);
tcg_gen_ext_i32_i64(tmp2, b);
- dead_tmp(b);
+ tcg_temp_free_i32(b);
tcg_gen_mul_i64(tmp1, tmp1, tmp2);
+ tcg_temp_free_i64(tmp2);
return tmp1;
}
-/* Unsigned 32x32->64 multiply. */
-static void gen_op_mull_T0_T1(void)
-{
- TCGv_i64 tmp1 = tcg_temp_new_i64();
- TCGv_i64 tmp2 = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
- tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
- tcg_gen_mul_i64(tmp1, tmp1, tmp2);
- tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
- tcg_gen_shri_i64(tmp1, tmp1, 32);
- tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
-}
-
-/* Signed 32x32->64 multiply. */
-static void gen_imull(TCGv a, TCGv b)
-{
- TCGv_i64 tmp1 = tcg_temp_new_i64();
- TCGv_i64 tmp2 = tcg_temp_new_i64();
-
- tcg_gen_ext_i32_i64(tmp1, a);
- tcg_gen_ext_i32_i64(tmp2, b);
- tcg_gen_mul_i64(tmp1, tmp1, tmp2);
- tcg_gen_trunc_i64_i32(a, tmp1);
- tcg_gen_shri_i64(tmp1, tmp1, 32);
- tcg_gen_trunc_i64_i32(b, tmp1);
-}
-#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
-
/* Swap low and high halfwords. */
static void gen_swap_half(TCGv var)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_shri_i32(tmp, var, 16);
tcg_gen_shli_i32(var, var, 16);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
@@ -402,15 +364,15 @@ static void gen_swap_half(TCGv var)
static void gen_add16(TCGv t0, TCGv t1)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_xor_i32(tmp, t0, t1);
tcg_gen_andi_i32(tmp, tmp, 0x8000);
tcg_gen_andi_i32(t0, t0, ~0x8000);
tcg_gen_andi_i32(t1, t1, ~0x8000);
tcg_gen_add_i32(t0, t0, t1);
tcg_gen_xor_i32(t0, t0, tmp);
- dead_tmp(tmp);
- dead_tmp(t1);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(t1);
}
#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
@@ -418,10 +380,10 @@ static void gen_add16(TCGv t0, TCGv t1)
/* Set CF to the top bit of var. */
static void gen_set_CF_bit31(TCGv var)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_shri_i32(tmp, var, 31);
gen_set_CF(tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
/* Set N and Z flags from var. */
@@ -432,13 +394,13 @@ static inline void gen_logic_CC(TCGv var)
}
/* T0 += T1 + CF. */
-static void gen_adc_T0_T1(void)
+static void gen_adc(TCGv t0, TCGv t1)
{
TCGv tmp;
- gen_op_addl_T0_T1();
+ tcg_gen_add_i32(t0, t0, t1);
tmp = load_cpu_field(CF);
- tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
- dead_tmp(tmp);
+ tcg_gen_add_i32(t0, t0, tmp);
+ tcg_temp_free_i32(tmp);
}
/* dest = T0 + T1 + CF. */
@@ -448,7 +410,7 @@ static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
tcg_gen_add_i32(dest, t0, t1);
tmp = load_cpu_field(CF);
tcg_gen_add_i32(dest, dest, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
/* dest = T0 - T1 + CF - 1. */
@@ -459,48 +421,15 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
tmp = load_cpu_field(CF);
tcg_gen_add_i32(dest, dest, tmp);
tcg_gen_subi_i32(dest, dest, 1);
- dead_tmp(tmp);
-}
-
-#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
-
-/* T0 &= ~T1. Clobbers T1. */
-/* FIXME: Implement bic natively. */
-static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
-{
- TCGv tmp = new_tmp();
- tcg_gen_not_i32(tmp, t1);
- tcg_gen_and_i32(dest, t0, tmp);
- dead_tmp(tmp);
-}
-static inline void gen_op_bicl_T0_T1(void)
-{
- gen_op_notl_T1();
- gen_op_andl_T0_T1();
+ tcg_temp_free_i32(tmp);
}
/* FIXME: Implement this natively. */
#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
-/* FIXME: Implement this natively. */
-static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
-{
- TCGv tmp;
-
- if (i == 0)
- return;
-
- tmp = new_tmp();
- tcg_gen_shri_i32(tmp, t1, i);
- tcg_gen_shli_i32(t1, t1, 32 - i);
- tcg_gen_or_i32(t0, t1, tmp);
- dead_tmp(tmp);
-}
-
static void shifter_out_im(TCGv var, int shift)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
if (shift == 0) {
tcg_gen_andi_i32(tmp, var, 1);
} else {
@@ -509,7 +438,7 @@ static void shifter_out_im(TCGv var, int shift)
tcg_gen_andi_i32(tmp, tmp, 1);
}
gen_set_CF(tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
/* Shift by immediate. Includes special handling for shift == 0. */
@@ -549,7 +478,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
if (shift != 0) {
if (flags)
shifter_out_im(var, shift - 1);
- tcg_gen_rori_i32(var, var, shift); break;
+ tcg_gen_rotri_i32(var, var, shift); break;
} else {
TCGv tmp = load_cpu_field(CF);
if (flags)
@@ -557,7 +486,7 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
tcg_gen_shri_i32(var, var, 1);
tcg_gen_shli_i32(tmp, tmp, 31);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
}
};
@@ -577,10 +506,11 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop,
case 0: gen_helper_shl(var, var, shift); break;
case 1: gen_helper_shr(var, var, shift); break;
case 2: gen_helper_sar(var, var, shift); break;
- case 3: gen_helper_ror(var, var, shift); break;
+ case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
+ tcg_gen_rotr_i32(var, var, shift); break;
}
}
- dead_tmp(shift);
+ tcg_temp_free_i32(shift);
}
#define PAS_OP(pfx) \
@@ -602,11 +532,13 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
tmp = tcg_temp_new_ptr();
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
PAS_OP(s)
+ tcg_temp_free_ptr(tmp);
break;
case 5:
tmp = tcg_temp_new_ptr();
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
PAS_OP(u)
+ tcg_temp_free_ptr(tmp);
break;
#undef gen_pas_helper
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
@@ -629,7 +561,7 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
#define PAS_OP(pfx) \
- switch (op2) { \
+ switch (op1) { \
case 0: gen_pas_helper(glue(pfx,add8)); break; \
case 1: gen_pas_helper(glue(pfx,add16)); break; \
case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
@@ -641,17 +573,19 @@ static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
{
TCGv_ptr tmp;
- switch (op1) {
+ switch (op2) {
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
case 0:
tmp = tcg_temp_new_ptr();
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
PAS_OP(s)
+ tcg_temp_free_ptr(tmp);
break;
case 4:
tmp = tcg_temp_new_ptr();
tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
PAS_OP(u)
+ tcg_temp_free_ptr(tmp);
break;
#undef gen_pas_helper
#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
@@ -715,7 +649,7 @@ static void gen_test_cc(int cc, int label)
inv = gen_new_label();
tmp = load_cpu_field(CF);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = load_cpu_field(ZF);
tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
gen_set_label(inv);
@@ -723,7 +657,7 @@ static void gen_test_cc(int cc, int label)
case 9: /* ls: !C || Z */
tmp = load_cpu_field(CF);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = load_cpu_field(ZF);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
break;
@@ -731,43 +665,43 @@ static void gen_test_cc(int cc, int label)
tmp = load_cpu_field(VF);
tmp2 = load_cpu_field(NF);
tcg_gen_xor_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
break;
case 11: /* lt: N != V -> N ^ V != 0 */
tmp = load_cpu_field(VF);
tmp2 = load_cpu_field(NF);
tcg_gen_xor_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
break;
case 12: /* gt: !Z && N == V */
inv = gen_new_label();
tmp = load_cpu_field(ZF);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = load_cpu_field(VF);
tmp2 = load_cpu_field(NF);
tcg_gen_xor_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
gen_set_label(inv);
break;
case 13: /* le: Z || N != V */
tmp = load_cpu_field(ZF);
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = load_cpu_field(VF);
tmp2 = load_cpu_field(NF);
tcg_gen_xor_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
break;
default:
fprintf(stderr, "Bad condition code 0x%x\n", cc);
abort();
}
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
static const uint8_t table_logic_cc[16] = {
@@ -795,35 +729,22 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr)
TCGv tmp;
s->is_jmp = DISAS_UPDATE;
- tmp = new_tmp();
if (s->thumb != (addr & 1)) {
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, addr & 1);
tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
+ tcg_temp_free_i32(tmp);
}
- tcg_gen_movi_i32(tmp, addr & ~1);
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
- dead_tmp(tmp);
+ tcg_gen_movi_i32(cpu_R[15], addr & ~1);
}
/* Set PC and Thumb state from var. var is marked as dead. */
static inline void gen_bx(DisasContext *s, TCGv var)
{
- TCGv tmp;
-
s->is_jmp = DISAS_UPDATE;
- tmp = new_tmp();
- tcg_gen_andi_i32(tmp, var, 1);
- store_cpu_field(tmp, thumb);
- tcg_gen_andi_i32(var, var, ~1);
- store_cpu_field(var, regs[15]);
-}
-
-/* TODO: This should be removed. Use gen_bx instead. */
-static inline void gen_bx_T0(DisasContext *s)
-{
- TCGv tmp = new_tmp();
- tcg_gen_mov_i32(tmp, cpu_T[0]);
- gen_bx(s, tmp);
+ tcg_gen_andi_i32(cpu_R[15], var, ~1);
+ tcg_gen_andi_i32(var, var, 1);
+ store_cpu_field(var, thumb);
}
/* Variant of store_reg which uses branch&exchange logic when storing
@@ -839,105 +760,92 @@ static inline void store_reg_bx(CPUState *env, DisasContext *s,
}
}
+/* Variant of store_reg which uses branch&exchange logic when storing
+ * to r15 in ARM architecture v5T and above. This is used for storing
+ * the results of a LDR/LDM/POP into r15, and corresponds to the cases
+ * in the ARM ARM which use the LoadWritePC() pseudocode function. */
+static inline void store_reg_from_load(CPUState *env, DisasContext *s,
+ int reg, TCGv var)
+{
+ if (reg == 15 && ENABLE_ARCH_5) {
+ gen_bx(s, var);
+ } else {
+ store_reg(s, reg, var);
+ }
+}
+
+static inline void gen_smc(CPUState *env, DisasContext *s)
+{
+ tcg_gen_movi_i32(cpu_R[15], s->pc);
+ s->is_jmp = DISAS_SMC;
+}
+
static inline TCGv gen_ld8s(TCGv addr, int index)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_qemu_ld8s(tmp, addr, index);
return tmp;
}
static inline TCGv gen_ld8u(TCGv addr, int index)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_qemu_ld8u(tmp, addr, index);
return tmp;
}
static inline TCGv gen_ld16s(TCGv addr, int index)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_qemu_ld16s(tmp, addr, index);
return tmp;
}
static inline TCGv gen_ld16u(TCGv addr, int index)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_qemu_ld16u(tmp, addr, index);
return tmp;
}
static inline TCGv gen_ld32(TCGv addr, int index)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_qemu_ld32u(tmp, addr, index);
return tmp;
}
+static inline TCGv_i64 gen_ld64(TCGv addr, int index)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_qemu_ld64(tmp, addr, index);
+ return tmp;
+}
static inline void gen_st8(TCGv val, TCGv addr, int index)
{
tcg_gen_qemu_st8(val, addr, index);
- dead_tmp(val);
+ tcg_temp_free_i32(val);
}
static inline void gen_st16(TCGv val, TCGv addr, int index)
{
tcg_gen_qemu_st16(val, addr, index);
- dead_tmp(val);
+ tcg_temp_free_i32(val);
}
static inline void gen_st32(TCGv val, TCGv addr, int index)
{
tcg_gen_qemu_st32(val, addr, index);
- dead_tmp(val);
-}
-
-static inline void gen_movl_T0_reg(DisasContext *s, int reg)
-{
- load_reg_var(s, cpu_T[0], reg);
+ tcg_temp_free_i32(val);
}
-
-static inline void gen_movl_T1_reg(DisasContext *s, int reg)
+static inline void gen_st64(TCGv_i64 val, TCGv addr, int index)
{
- load_reg_var(s, cpu_T[1], reg);
-}
-
-static inline void gen_movl_T2_reg(DisasContext *s, int reg)
-{
- load_reg_var(s, cpu_T[2], reg);
+ tcg_gen_qemu_st64(val, addr, index);
+ tcg_temp_free_i64(val);
}
static inline void gen_set_pc_im(uint32_t val)
{
- TCGv tmp = new_tmp();
- tcg_gen_movi_i32(tmp, val);
- store_cpu_field(tmp, regs[15]);
-}
-
-static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
-{
- TCGv tmp;
- if (reg == 15) {
- tmp = new_tmp();
- tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
- } else {
- tmp = cpu_T[t];
- }
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
- if (reg == 15) {
- dead_tmp(tmp);
- s->is_jmp = DISAS_JUMP;
- }
-}
-
-static inline void gen_movl_reg_T0(DisasContext *s, int reg)
-{
- gen_movl_reg_TN(s, reg, 0);
-}
-
-static inline void gen_movl_reg_T1(DisasContext *s, int reg)
-{
- gen_movl_reg_TN(s, reg, 1);
+ tcg_gen_movi_i32(cpu_R[15], val);
}
/* Force a TB lookup after an instruction that changes the CPU state. */
static inline void gen_lookup_tb(DisasContext *s)
{
- gen_op_movl_T0_im(s->pc);
- gen_movl_reg_T0(s, 15);
+ tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
s->is_jmp = DISAS_UPDATE;
}
@@ -965,7 +873,7 @@ static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
tcg_gen_sub_i32(var, var, offset);
else
tcg_gen_add_i32(var, var, offset);
- dead_tmp(offset);
+ tcg_temp_free_i32(offset);
}
}
@@ -993,7 +901,7 @@ static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
tcg_gen_sub_i32(var, var, offset);
else
tcg_gen_add_i32(var, var, offset);
- dead_tmp(offset);
+ tcg_temp_free_i32(offset);
}
}
@@ -1013,6 +921,26 @@ VFP_OP2(div)
#undef VFP_OP2
+static inline void gen_vfp_F1_mul(int dp)
+{
+ /* Like gen_vfp_mul() but put result in F1 */
+ if (dp) {
+ gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, cpu_env);
+ } else {
+ gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, cpu_env);
+ }
+}
+
+static inline void gen_vfp_F1_neg(int dp)
+{
+ /* Like gen_vfp_neg() but put result in F1 */
+ if (dp) {
+ gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
+ } else {
+ gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
+ }
+}
+
static inline void gen_vfp_abs(int dp)
{
if (dp)
@@ -1061,61 +989,73 @@ static inline void gen_vfp_F1_ld0(int dp)
tcg_gen_movi_i32(cpu_F1s, 0);
}
-static inline void gen_vfp_uito(int dp)
-{
- if (dp)
- gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
- else
- gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
-}
-
-static inline void gen_vfp_sito(int dp)
-{
- if (dp)
- gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env);
- else
- gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env);
-}
-
-static inline void gen_vfp_toui(int dp)
-{
- if (dp)
- gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
+#define VFP_GEN_ITOF(name) \
+static inline void gen_vfp_##name(int dp, int neon) \
+{ \
+ TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+ int offset; \
+ if (neon) { \
+ offset = offsetof(CPUState, vfp.standard_fp_status); \
+ } else { \
+ offset = offsetof(CPUState, vfp.fp_status); \
+ } \
+ tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+ if (dp) { \
+ gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
+ } else { \
+ gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
+ } \
+ tcg_temp_free_ptr(statusptr); \
}
-static inline void gen_vfp_touiz(int dp)
-{
- if (dp)
- gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
-}
+VFP_GEN_ITOF(uito)
+VFP_GEN_ITOF(sito)
+#undef VFP_GEN_ITOF
-static inline void gen_vfp_tosi(int dp)
-{
- if (dp)
- gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
+#define VFP_GEN_FTOI(name) \
+static inline void gen_vfp_##name(int dp, int neon) \
+{ \
+ TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+ int offset; \
+ if (neon) { \
+ offset = offsetof(CPUState, vfp.standard_fp_status); \
+ } else { \
+ offset = offsetof(CPUState, vfp.fp_status); \
+ } \
+ tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+ if (dp) { \
+ gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
+ } else { \
+ gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
+ } \
+ tcg_temp_free_ptr(statusptr); \
}
-static inline void gen_vfp_tosiz(int dp)
-{
- if (dp)
- gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
-}
+VFP_GEN_FTOI(toui)
+VFP_GEN_FTOI(touiz)
+VFP_GEN_FTOI(tosi)
+VFP_GEN_FTOI(tosiz)
+#undef VFP_GEN_FTOI
#define VFP_GEN_FIX(name) \
-static inline void gen_vfp_##name(int dp, int shift) \
+static inline void gen_vfp_##name(int dp, int shift, int neon) \
{ \
- if (dp) \
- gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\
- else \
- gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\
+ TCGv tmp_shift = tcg_const_i32(shift); \
+ TCGv_ptr statusptr = tcg_temp_new_ptr(); \
+ int offset; \
+ if (neon) { \
+ offset = offsetof(CPUState, vfp.standard_fp_status); \
+ } else { \
+ offset = offsetof(CPUState, vfp.fp_status); \
+ } \
+ tcg_gen_addi_ptr(statusptr, cpu_env, offset); \
+ if (dp) { \
+ gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
+ } else { \
+ gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
+ } \
+ tcg_temp_free_i32(tmp_shift); \
+ tcg_temp_free_ptr(statusptr); \
}
VFP_GEN_FIX(tosh)
VFP_GEN_FIX(tosl)
@@ -1127,20 +1067,20 @@ VFP_GEN_FIX(uhto)
VFP_GEN_FIX(ulto)
#undef VFP_GEN_FIX
-static inline void gen_vfp_ld(DisasContext *s, int dp)
+static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
{
if (dp)
- tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s));
+ tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
else
- tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s));
+ tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
}
-static inline void gen_vfp_st(DisasContext *s, int dp)
+static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr)
{
if (dp)
- tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s));
+ tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
else
- tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s));
+ tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
}
static inline long
@@ -1167,17 +1107,9 @@ neon_reg_offset (int reg, int n)
return vfp_reg_offset(0, sreg);
}
-/* FIXME: Remove these. */
-#define neon_T0 cpu_T[0]
-#define neon_T1 cpu_T[1]
-#define NEON_GET_REG(T, reg, n) \
- tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
-#define NEON_SET_REG(T, reg, n) \
- tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
-
static TCGv neon_load_reg(int reg, int pass)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
return tmp;
}
@@ -1185,7 +1117,7 @@ static TCGv neon_load_reg(int reg, int pass)
static void neon_store_reg(int reg, int pass, TCGv var)
{
tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
- dead_tmp(var);
+ tcg_temp_free_i32(var);
}
static inline void neon_load_reg64(TCGv_i64 var, int reg)
@@ -1239,19 +1171,17 @@ static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
}
-static inline void gen_op_iwmmxt_movl_wCx_T0(int reg)
-{
- tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
-}
-
-static inline void gen_op_iwmmxt_movl_T0_wCx(int reg)
+static inline TCGv iwmmxt_load_creg(int reg)
{
- tcg_gen_ld_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
+ TCGv var = tcg_temp_new_i32();
+ tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
+ return var;
}
-static inline void gen_op_iwmmxt_movl_T1_wCx(int reg)
+static inline void iwmmxt_store_creg(int reg, TCGv var)
{
- tcg_gen_ld_i32(cpu_T[1], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
+ tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
+ tcg_temp_free_i32(var);
}
static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
@@ -1289,22 +1219,15 @@ static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
}
-#define IWMMXT_OP_ENV(name) \
-static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
-{ \
- iwmmxt_load_reg(cpu_V1, rn); \
- gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
-}
-
-#define IWMMXT_OP_ENV_SIZE(name) \
-IWMMXT_OP_ENV(name##b) \
-IWMMXT_OP_ENV(name##w) \
-IWMMXT_OP_ENV(name##l)
+#define IWMMXT_OP_SIZE(name) \
+IWMMXT_OP(name##b) \
+IWMMXT_OP(name##w) \
+IWMMXT_OP(name##l)
-#define IWMMXT_OP_ENV1(name) \
+#define IWMMXT_OP_1(name) \
static inline void gen_op_iwmmxt_##name##_M0(void) \
{ \
- gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
+ gen_helper_iwmmxt_##name(cpu_M0, cpu_M0); \
}
IWMMXT_OP(maddsq)
@@ -1318,100 +1241,51 @@ IWMMXT_OP(muluhw)
IWMMXT_OP(macsw)
IWMMXT_OP(macuw)
-IWMMXT_OP_ENV_SIZE(unpackl)
-IWMMXT_OP_ENV_SIZE(unpackh)
-
-IWMMXT_OP_ENV1(unpacklub)
-IWMMXT_OP_ENV1(unpackluw)
-IWMMXT_OP_ENV1(unpacklul)
-IWMMXT_OP_ENV1(unpackhub)
-IWMMXT_OP_ENV1(unpackhuw)
-IWMMXT_OP_ENV1(unpackhul)
-IWMMXT_OP_ENV1(unpacklsb)
-IWMMXT_OP_ENV1(unpacklsw)
-IWMMXT_OP_ENV1(unpacklsl)
-IWMMXT_OP_ENV1(unpackhsb)
-IWMMXT_OP_ENV1(unpackhsw)
-IWMMXT_OP_ENV1(unpackhsl)
-
-IWMMXT_OP_ENV_SIZE(cmpeq)
-IWMMXT_OP_ENV_SIZE(cmpgtu)
-IWMMXT_OP_ENV_SIZE(cmpgts)
-
-IWMMXT_OP_ENV_SIZE(mins)
-IWMMXT_OP_ENV_SIZE(minu)
-IWMMXT_OP_ENV_SIZE(maxs)
-IWMMXT_OP_ENV_SIZE(maxu)
-
-IWMMXT_OP_ENV_SIZE(subn)
-IWMMXT_OP_ENV_SIZE(addn)
-IWMMXT_OP_ENV_SIZE(subu)
-IWMMXT_OP_ENV_SIZE(addu)
-IWMMXT_OP_ENV_SIZE(subs)
-IWMMXT_OP_ENV_SIZE(adds)
-
-IWMMXT_OP_ENV(avgb0)
-IWMMXT_OP_ENV(avgb1)
-IWMMXT_OP_ENV(avgw0)
-IWMMXT_OP_ENV(avgw1)
+IWMMXT_OP_SIZE(unpackl)
+IWMMXT_OP_SIZE(unpackh)
+
+IWMMXT_OP_1(unpacklub)
+IWMMXT_OP_1(unpackluw)
+IWMMXT_OP_1(unpacklul)
+IWMMXT_OP_1(unpackhub)
+IWMMXT_OP_1(unpackhuw)
+IWMMXT_OP_1(unpackhul)
+IWMMXT_OP_1(unpacklsb)
+IWMMXT_OP_1(unpacklsw)
+IWMMXT_OP_1(unpacklsl)
+IWMMXT_OP_1(unpackhsb)
+IWMMXT_OP_1(unpackhsw)
+IWMMXT_OP_1(unpackhsl)
+
+IWMMXT_OP_SIZE(cmpeq)
+IWMMXT_OP_SIZE(cmpgtu)
+IWMMXT_OP_SIZE(cmpgts)
+
+IWMMXT_OP_SIZE(mins)
+IWMMXT_OP_SIZE(minu)
+IWMMXT_OP_SIZE(maxs)
+IWMMXT_OP_SIZE(maxu)
+
+IWMMXT_OP_SIZE(subn)
+IWMMXT_OP_SIZE(addn)
+IWMMXT_OP_SIZE(subu)
+IWMMXT_OP_SIZE(addu)
+IWMMXT_OP_SIZE(subs)
+IWMMXT_OP_SIZE(adds)
+
+IWMMXT_OP(avgb0)
+IWMMXT_OP(avgb1)
+IWMMXT_OP(avgw0)
+IWMMXT_OP(avgw1)
IWMMXT_OP(msadb)
-IWMMXT_OP_ENV(packuw)
-IWMMXT_OP_ENV(packul)
-IWMMXT_OP_ENV(packuq)
-IWMMXT_OP_ENV(packsw)
-IWMMXT_OP_ENV(packsl)
-IWMMXT_OP_ENV(packsq)
-
-static inline void gen_op_iwmmxt_muladdsl_M0_T0_T1(void)
-{
- gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
-}
-
-static inline void gen_op_iwmmxt_muladdsw_M0_T0_T1(void)
-{
- gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
-}
-
-static inline void gen_op_iwmmxt_muladdswl_M0_T0_T1(void)
-{
- gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
-}
-
-static inline void gen_op_iwmmxt_align_M0_T0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V1, rn);
- gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, cpu_T[0]);
-}
-
-static inline void gen_op_iwmmxt_insr_M0_T0_T1(int shift)
-{
- TCGv tmp = tcg_const_i32(shift);
- gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1], tmp);
-}
-
-static inline void gen_op_iwmmxt_extrsb_T0_M0(int shift)
-{
- tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
- tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
- tcg_gen_ext8s_i32(cpu_T[0], cpu_T[0]);
-}
-
-static inline void gen_op_iwmmxt_extrsw_T0_M0(int shift)
-{
- tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
- tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
- tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]);
-}
-
-static inline void gen_op_iwmmxt_extru_T0_M0(int shift, uint32_t mask)
-{
- tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
- tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
- if (mask != ~0u)
- tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
-}
+IWMMXT_OP(packuw)
+IWMMXT_OP(packul)
+IWMMXT_OP(packuq)
+IWMMXT_OP(packsw)
+IWMMXT_OP(packsl)
+IWMMXT_OP(packsq)
static void gen_op_iwmmxt_set_mup(void)
{
@@ -1431,7 +1305,7 @@ static void gen_op_iwmmxt_set_cup(void)
static void gen_op_iwmmxt_setpsr_nz(void)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
}
@@ -1443,76 +1317,70 @@ static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
}
-
-static void gen_iwmmxt_movl_T0_T1_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V0, rn);
- tcg_gen_trunc_i64_i32(cpu_T[0], cpu_V0);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
- tcg_gen_trunc_i64_i32(cpu_T[1], cpu_V0);
-}
-
-static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
-{
- tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]);
- iwmmxt_store_reg(cpu_V0, rn);
-}
-
-static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
+static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
{
int rd;
uint32_t offset;
+ TCGv tmp;
rd = (insn >> 16) & 0xf;
- gen_movl_T1_reg(s, rd);
+ tmp = load_reg(s, rd);
offset = (insn & 0xff) << ((insn >> 7) & 2);
if (insn & (1 << 24)) {
/* Pre indexed */
if (insn & (1 << 23))
- gen_op_addl_T1_im(offset);
+ tcg_gen_addi_i32(tmp, tmp, offset);
else
- gen_op_addl_T1_im(-offset);
-
+ tcg_gen_addi_i32(tmp, tmp, -offset);
+ tcg_gen_mov_i32(dest, tmp);
if (insn & (1 << 21))
- gen_movl_reg_T1(s, rd);
+ store_reg(s, rd, tmp);
+ else
+ tcg_temp_free_i32(tmp);
} else if (insn & (1 << 21)) {
/* Post indexed */
+ tcg_gen_mov_i32(dest, tmp);
if (insn & (1 << 23))
- gen_op_movl_T0_im(offset);
+ tcg_gen_addi_i32(tmp, tmp, offset);
else
- gen_op_movl_T0_im(- offset);
- gen_op_addl_T0_T1();
- gen_movl_reg_T0(s, rd);
+ tcg_gen_addi_i32(tmp, tmp, -offset);
+ store_reg(s, rd, tmp);
} else if (!(insn & (1 << 23)))
return 1;
return 0;
}
-static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
+static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
{
int rd = (insn >> 0) & 0xf;
+ TCGv tmp;
- if (insn & (1 << 8))
- if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
+ if (insn & (1 << 8)) {
+ if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
return 1;
- else
- gen_op_iwmmxt_movl_T0_wCx(rd);
- else
- gen_iwmmxt_movl_T0_T1_wRn(rd);
-
- gen_op_movl_T1_im(mask);
- gen_op_andl_T0_T1();
+ } else {
+ tmp = iwmmxt_load_creg(rd);
+ }
+ } else {
+ tmp = tcg_temp_new_i32();
+ iwmmxt_load_reg(cpu_V0, rd);
+ tcg_gen_trunc_i64_i32(tmp, cpu_V0);
+ }
+ tcg_gen_andi_i32(tmp, tmp, mask);
+ tcg_gen_mov_i32(dest, tmp);
+ tcg_temp_free_i32(tmp);
return 0;
}
-/* Disassemble an iwMMXt instruction. Returns nonzero if an error occured
+/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
(ie. an undefined instruction). */
static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
{
int rd, wrd;
int rdhi, rdlo, rd0, rd1, i;
- TCGv tmp;
+ TCGv addr;
+ TCGv tmp, tmp2, tmp3;
if ((insn & 0x0e000e00) == 0x0c000000) {
if ((insn & 0x0fe00ff0) == 0x0c400000) {
@@ -1520,77 +1388,78 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
rdlo = (insn >> 12) & 0xf;
rdhi = (insn >> 16) & 0xf;
if (insn & ARM_CP_RW_BIT) { /* TMRRC */
- gen_iwmmxt_movl_T0_T1_wRn(wrd);
- gen_movl_reg_T0(s, rdlo);
- gen_movl_reg_T1(s, rdhi);
+ iwmmxt_load_reg(cpu_V0, wrd);
+ tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
+ tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
+ tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
} else { /* TMCRR */
- gen_movl_T0_reg(s, rdlo);
- gen_movl_T1_reg(s, rdhi);
- gen_iwmmxt_movl_wRn_T0_T1(wrd);
+ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
+ iwmmxt_store_reg(cpu_V0, wrd);
gen_op_iwmmxt_set_mup();
}
return 0;
}
wrd = (insn >> 12) & 0xf;
- if (gen_iwmmxt_address(s, insn))
+ addr = tcg_temp_new_i32();
+ if (gen_iwmmxt_address(s, insn, addr)) {
+ tcg_temp_free_i32(addr);
return 1;
+ }
if (insn & ARM_CP_RW_BIT) {
if ((insn >> 28) == 0xf) { /* WLDRW wCx */
- tmp = gen_ld32(cpu_T[1], IS_USER(s));
- tcg_gen_mov_i32(cpu_T[0], tmp);
- dead_tmp(tmp);
- gen_op_iwmmxt_movl_wCx_T0(wrd);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
+ iwmmxt_store_creg(wrd, tmp);
} else {
i = 1;
if (insn & (1 << 8)) {
if (insn & (1 << 22)) { /* WLDRD */
- tcg_gen_qemu_ld64(cpu_M0, cpu_T[1], IS_USER(s));
+ tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
i = 0;
} else { /* WLDRW wRd */
- tmp = gen_ld32(cpu_T[1], IS_USER(s));
+ tmp = gen_ld32(addr, IS_USER(s));
}
} else {
if (insn & (1 << 22)) { /* WLDRH */
- tmp = gen_ld16u(cpu_T[1], IS_USER(s));
+ tmp = gen_ld16u(addr, IS_USER(s));
} else { /* WLDRB */
- tmp = gen_ld8u(cpu_T[1], IS_USER(s));
+ tmp = gen_ld8u(addr, IS_USER(s));
}
}
if (i) {
tcg_gen_extu_i32_i64(cpu_M0, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
gen_op_iwmmxt_movq_wRn_M0(wrd);
}
} else {
if ((insn >> 28) == 0xf) { /* WSTRW wCx */
- gen_op_iwmmxt_movl_T0_wCx(wrd);
- tmp = new_tmp();
- tcg_gen_mov_i32(tmp, cpu_T[0]);
- gen_st32(tmp, cpu_T[1], IS_USER(s));
+ tmp = iwmmxt_load_creg(wrd);
+ gen_st32(tmp, addr, IS_USER(s));
} else {
gen_op_iwmmxt_movq_M0_wRn(wrd);
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
if (insn & (1 << 8)) {
if (insn & (1 << 22)) { /* WSTRD */
- dead_tmp(tmp);
- tcg_gen_qemu_st64(cpu_M0, cpu_T[1], IS_USER(s));
+ tcg_temp_free_i32(tmp);
+ tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
} else { /* WSTRW wRd */
tcg_gen_trunc_i64_i32(tmp, cpu_M0);
- gen_st32(tmp, cpu_T[1], IS_USER(s));
+ gen_st32(tmp, addr, IS_USER(s));
}
} else {
if (insn & (1 << 22)) { /* WSTRH */
tcg_gen_trunc_i64_i32(tmp, cpu_M0);
- gen_st16(tmp, cpu_T[1], IS_USER(s));
+ gen_st16(tmp, addr, IS_USER(s));
} else { /* WSTRB */
tcg_gen_trunc_i64_i32(tmp, cpu_M0);
- gen_st8(tmp, cpu_T[1], IS_USER(s));
+ gen_st8(tmp, addr, IS_USER(s));
}
}
}
}
+ tcg_temp_free_i32(addr);
return 0;
}
@@ -1622,18 +1491,19 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
gen_op_iwmmxt_set_cup();
/* Fall through. */
case ARM_IWMMXT_wCSSF:
- gen_op_iwmmxt_movl_T0_wCx(wrd);
- gen_movl_T1_reg(s, rd);
- gen_op_bicl_T0_T1();
- gen_op_iwmmxt_movl_wCx_T0(wrd);
+ tmp = iwmmxt_load_creg(wrd);
+ tmp2 = load_reg(s, rd);
+ tcg_gen_andc_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ iwmmxt_store_creg(wrd, tmp);
break;
case ARM_IWMMXT_wCGR0:
case ARM_IWMMXT_wCGR1:
case ARM_IWMMXT_wCGR2:
case ARM_IWMMXT_wCGR3:
gen_op_iwmmxt_set_cup();
- gen_movl_reg_T0(s, rd);
- gen_op_iwmmxt_movl_wCx_T0(wrd);
+ tmp = load_reg(s, rd);
+ iwmmxt_store_creg(wrd, tmp);
break;
default:
return 1;
@@ -1655,8 +1525,8 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
return 1;
rd = (insn >> 12) & 0xf;
wrd = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movl_T0_wCx(wrd);
- gen_movl_reg_T0(s, rd);
+ tmp = iwmmxt_load_creg(wrd);
+ store_reg(s, rd, tmp);
break;
case 0x300: /* WANDN */
wrd = (insn >> 12) & 0xf;
@@ -1833,131 +1703,145 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
rd0 = (insn >> 16) & 0xf;
rd1 = (insn >> 0) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
- gen_op_movl_T1_im(7);
- gen_op_andl_T0_T1();
- gen_op_iwmmxt_align_M0_T0_wRn(rd1);
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
+ tcg_gen_andi_i32(tmp, tmp, 7);
+ iwmmxt_load_reg(cpu_V1, rd1);
+ gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
break;
case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
+ if (((insn >> 6) & 3) == 3)
+ return 1;
rd = (insn >> 12) & 0xf;
wrd = (insn >> 16) & 0xf;
- gen_movl_T0_reg(s, rd);
+ tmp = load_reg(s, rd);
gen_op_iwmmxt_movq_M0_wRn(wrd);
switch ((insn >> 6) & 3) {
case 0:
- gen_op_movl_T1_im(0xff);
- gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
+ tmp2 = tcg_const_i32(0xff);
+ tmp3 = tcg_const_i32((insn & 7) << 3);
break;
case 1:
- gen_op_movl_T1_im(0xffff);
- gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
+ tmp2 = tcg_const_i32(0xffff);
+ tmp3 = tcg_const_i32((insn & 3) << 4);
break;
case 2:
- gen_op_movl_T1_im(0xffffffff);
- gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
+ tmp2 = tcg_const_i32(0xffffffff);
+ tmp3 = tcg_const_i32((insn & 1) << 5);
break;
- case 3:
- return 1;
+ default:
+ TCGV_UNUSED(tmp2);
+ TCGV_UNUSED(tmp3);
}
+ gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
+ tcg_temp_free(tmp3);
+ tcg_temp_free(tmp2);
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
break;
case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
rd = (insn >> 12) & 0xf;
wrd = (insn >> 16) & 0xf;
- if (rd == 15)
+ if (rd == 15 || ((insn >> 22) & 3) == 3)
return 1;
gen_op_iwmmxt_movq_M0_wRn(wrd);
+ tmp = tcg_temp_new_i32();
switch ((insn >> 22) & 3) {
case 0:
- if (insn & 8)
- gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
- else {
- gen_op_iwmmxt_extru_T0_M0((insn & 7) << 3, 0xff);
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
+ tcg_gen_trunc_i64_i32(tmp, cpu_M0);
+ if (insn & 8) {
+ tcg_gen_ext8s_i32(tmp, tmp);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, 0xff);
}
break;
case 1:
- if (insn & 8)
- gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
- else {
- gen_op_iwmmxt_extru_T0_M0((insn & 3) << 4, 0xffff);
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
+ tcg_gen_trunc_i64_i32(tmp, cpu_M0);
+ if (insn & 8) {
+ tcg_gen_ext16s_i32(tmp, tmp);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
}
break;
case 2:
- gen_op_iwmmxt_extru_T0_M0((insn & 1) << 5, ~0u);
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
+ tcg_gen_trunc_i64_i32(tmp, cpu_M0);
break;
- case 3:
- return 1;
}
- gen_movl_reg_T0(s, rd);
+ store_reg(s, rd, tmp);
break;
case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
- if ((insn & 0x000ff008) != 0x0003f000)
+ if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
return 1;
- gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
switch ((insn >> 22) & 3) {
case 0:
- gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
break;
case 1:
- gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
break;
case 2:
- gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
break;
- case 3:
- return 1;
}
- gen_op_shll_T1_im(28);
- gen_set_nzcv(cpu_T[1]);
+ tcg_gen_shli_i32(tmp, tmp, 28);
+ gen_set_nzcv(tmp);
+ tcg_temp_free_i32(tmp);
break;
case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
+ if (((insn >> 6) & 3) == 3)
+ return 1;
rd = (insn >> 12) & 0xf;
wrd = (insn >> 16) & 0xf;
- gen_movl_T0_reg(s, rd);
+ tmp = load_reg(s, rd);
switch ((insn >> 6) & 3) {
case 0:
- gen_helper_iwmmxt_bcstb(cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
break;
case 1:
- gen_helper_iwmmxt_bcstw(cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
break;
case 2:
- gen_helper_iwmmxt_bcstl(cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
break;
- case 3:
- return 1;
}
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
break;
case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
- if ((insn & 0x000ff00f) != 0x0003f000)
+ if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
return 1;
- gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp2, tmp);
switch ((insn >> 22) & 3) {
case 0:
for (i = 0; i < 7; i ++) {
- gen_op_shll_T1_im(4);
- gen_op_andl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 4);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
}
break;
case 1:
for (i = 0; i < 3; i ++) {
- gen_op_shll_T1_im(8);
- gen_op_andl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 8);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
}
break;
case 2:
- gen_op_shll_T1_im(16);
- gen_op_andl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
break;
- case 3:
- return 1;
}
- gen_set_nzcv(cpu_T[0]);
+ gen_set_nzcv(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
break;
case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
wrd = (insn >> 12) & 0xf;
@@ -1980,51 +1864,52 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
gen_op_iwmmxt_set_mup();
break;
case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
- if ((insn & 0x000ff00f) != 0x0003f000)
+ if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
return 1;
- gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp2, tmp);
switch ((insn >> 22) & 3) {
case 0:
for (i = 0; i < 7; i ++) {
- gen_op_shll_T1_im(4);
- gen_op_orl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 4);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
}
break;
case 1:
for (i = 0; i < 3; i ++) {
- gen_op_shll_T1_im(8);
- gen_op_orl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 8);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
}
break;
case 2:
- gen_op_shll_T1_im(16);
- gen_op_orl_T0_T1();
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
break;
- case 3:
- return 1;
}
- gen_set_nzcv(cpu_T[0]);
+ gen_set_nzcv(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
break;
case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
rd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
- if ((insn & 0xf) != 0)
+ if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
return 1;
gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
switch ((insn >> 22) & 3) {
case 0:
- gen_helper_iwmmxt_msbb(cpu_T[0], cpu_M0);
+ gen_helper_iwmmxt_msbb(tmp, cpu_M0);
break;
case 1:
- gen_helper_iwmmxt_msbw(cpu_T[0], cpu_M0);
+ gen_helper_iwmmxt_msbw(tmp, cpu_M0);
break;
case 2:
- gen_helper_iwmmxt_msbl(cpu_T[0], cpu_M0);
+ gen_helper_iwmmxt_msbl(tmp, cpu_M0);
break;
- case 3:
- return 1;
}
- gen_movl_reg_T0(s, rd);
+ store_reg(s, rd, tmp);
break;
case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
case 0x906: case 0xb06: case 0xd06: case 0xf06:
@@ -2122,100 +2007,120 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
break;
case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
case 0x214: case 0x614: case 0xa14: case 0xe14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (gen_iwmmxt_shift(insn, 0xff))
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
+ }
switch ((insn >> 22) & 3) {
- case 0:
- return 1;
case 1:
- gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_srlw(cpu_M0, cpu_M0, tmp);
break;
case 2:
- gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_srll(cpu_M0, cpu_M0, tmp);
break;
case 3:
- gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_srlq(cpu_M0, cpu_M0, tmp);
break;
}
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
gen_op_iwmmxt_set_cup();
break;
case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
case 0x014: case 0x414: case 0x814: case 0xc14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (gen_iwmmxt_shift(insn, 0xff))
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
+ }
switch ((insn >> 22) & 3) {
- case 0:
- return 1;
case 1:
- gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_sraw(cpu_M0, cpu_M0, tmp);
break;
case 2:
- gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_sral(cpu_M0, cpu_M0, tmp);
break;
case 3:
- gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_sraq(cpu_M0, cpu_M0, tmp);
break;
}
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
gen_op_iwmmxt_set_cup();
break;
case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
case 0x114: case 0x514: case 0x914: case 0xd14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (gen_iwmmxt_shift(insn, 0xff))
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
+ }
switch ((insn >> 22) & 3) {
- case 0:
- return 1;
case 1:
- gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_sllw(cpu_M0, cpu_M0, tmp);
break;
case 2:
- gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_slll(cpu_M0, cpu_M0, tmp);
break;
case 3:
- gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ gen_helper_iwmmxt_sllq(cpu_M0, cpu_M0, tmp);
break;
}
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
gen_op_iwmmxt_set_cup();
break;
case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
case 0x314: case 0x714: case 0xb14: case 0xf14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
switch ((insn >> 22) & 3) {
- case 0:
- return 1;
case 1:
- if (gen_iwmmxt_shift(insn, 0xf))
+ if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
- gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ }
+ gen_helper_iwmmxt_rorw(cpu_M0, cpu_M0, tmp);
break;
case 2:
- if (gen_iwmmxt_shift(insn, 0x1f))
+ if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
- gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ }
+ gen_helper_iwmmxt_rorl(cpu_M0, cpu_M0, tmp);
break;
case 3:
- if (gen_iwmmxt_shift(insn, 0x3f))
+ if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
+ tcg_temp_free_i32(tmp);
return 1;
- gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ }
+ gen_helper_iwmmxt_rorq(cpu_M0, cpu_M0, tmp);
break;
}
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
gen_op_iwmmxt_set_cup();
@@ -2288,8 +2193,10 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
rd0 = (insn >> 16) & 0xf;
rd1 = (insn >> 0) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_movl_T0_im((insn >> 20) & 3);
- gen_op_iwmmxt_align_M0_T0_wRn(rd1);
+ tmp = tcg_const_i32((insn >> 20) & 3);
+ iwmmxt_load_reg(cpu_V1, rd1);
+ gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
+ tcg_temp_free(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
break;
@@ -2343,8 +2250,9 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
- gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
+ tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
+ gen_helper_iwmmxt_shufh(cpu_M0, cpu_M0, tmp);
+ tcg_temp_free(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
gen_op_iwmmxt_set_cup();
@@ -2396,15 +2304,13 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
case 0x408: case 0x508: case 0x608: case 0x708:
case 0x808: case 0x908: case 0xa08: case 0xb08:
case 0xc08: case 0xd08: case 0xe08: case 0xf08:
+ if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
+ return 1;
wrd = (insn >> 12) & 0xf;
rd0 = (insn >> 16) & 0xf;
rd1 = (insn >> 0) & 0xf;
gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (!(insn & (1 << 20)))
- return 1;
switch ((insn >> 22) & 3) {
- case 0:
- return 1;
case 1:
if (insn & (1 << 21))
gen_op_iwmmxt_packsw_M0_wRn(rd1);
@@ -2438,30 +2344,29 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
if (rd0 == 0xf || rd1 == 0xf)
return 1;
gen_op_iwmmxt_movq_M0_wRn(wrd);
+ tmp = load_reg(s, rd0);
+ tmp2 = load_reg(s, rd1);
switch ((insn >> 16) & 0xf) {
case 0x0: /* TMIA */
- gen_movl_T0_reg(s, rd0);
- gen_movl_T1_reg(s, rd1);
- gen_op_iwmmxt_muladdsl_M0_T0_T1();
+ gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
break;
case 0x8: /* TMIAPH */
- gen_movl_T0_reg(s, rd0);
- gen_movl_T1_reg(s, rd1);
- gen_op_iwmmxt_muladdsw_M0_T0_T1();
+ gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
break;
case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
- gen_movl_T1_reg(s, rd0);
if (insn & (1 << 16))
- gen_op_shrl_T1_im(16);
- gen_op_movl_T0_T1();
- gen_movl_T1_reg(s, rd1);
+ tcg_gen_shri_i32(tmp, tmp, 16);
if (insn & (1 << 17))
- gen_op_shrl_T1_im(16);
- gen_op_iwmmxt_muladdswl_M0_T0_T1();
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
break;
default:
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
return 1;
}
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(wrd);
gen_op_iwmmxt_set_mup();
break;
@@ -2472,11 +2377,12 @@ static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
return 0;
}
-/* Disassemble an XScale DSP instruction. Returns nonzero if an error occured
+/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
(ie. an undefined instruction). */
static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
{
int acc, rd0, rd1, rdhi, rdlo;
+ TCGv tmp, tmp2;
if ((insn & 0x0ff00f10) == 0x0e200010) {
/* Multiply with Internal Accumulate Format */
@@ -2487,33 +2393,30 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
if (acc != 0)
return 1;
+ tmp = load_reg(s, rd0);
+ tmp2 = load_reg(s, rd1);
switch ((insn >> 16) & 0xf) {
case 0x0: /* MIA */
- gen_movl_T0_reg(s, rd0);
- gen_movl_T1_reg(s, rd1);
- gen_op_iwmmxt_muladdsl_M0_T0_T1();
+ gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
break;
case 0x8: /* MIAPH */
- gen_movl_T0_reg(s, rd0);
- gen_movl_T1_reg(s, rd1);
- gen_op_iwmmxt_muladdsw_M0_T0_T1();
+ gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
break;
case 0xc: /* MIABB */
case 0xd: /* MIABT */
case 0xe: /* MIATB */
case 0xf: /* MIATT */
- gen_movl_T1_reg(s, rd0);
if (insn & (1 << 16))
- gen_op_shrl_T1_im(16);
- gen_op_movl_T0_T1();
- gen_movl_T1_reg(s, rd1);
+ tcg_gen_shri_i32(tmp, tmp, 16);
if (insn & (1 << 17))
- gen_op_shrl_T1_im(16);
- gen_op_iwmmxt_muladdswl_M0_T0_T1();
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
break;
default:
return 1;
}
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
gen_op_iwmmxt_movq_wRn_M0(acc);
return 0;
@@ -2529,15 +2432,14 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
return 1;
if (insn & ARM_CP_RW_BIT) { /* MRA */
- gen_iwmmxt_movl_T0_T1_wRn(acc);
- gen_movl_reg_T0(s, rdlo);
- gen_op_movl_T0_im((1 << (40 - 32)) - 1);
- gen_op_andl_T0_T1();
- gen_movl_reg_T0(s, rdhi);
+ iwmmxt_load_reg(cpu_V0, acc);
+ tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
+ tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
+ tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
+ tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
} else { /* MAR */
- gen_movl_T0_reg(s, rdlo);
- gen_movl_T1_reg(s, rdhi);
- gen_iwmmxt_movl_wRn_T0_T1(acc);
+ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
+ iwmmxt_store_reg(cpu_V0, acc);
}
return 0;
}
@@ -2549,27 +2451,28 @@ static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
instruction is not defined. */
static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
{
- TCGv tmp;
+ TCGv tmp, tmp2;
uint32_t rd = (insn >> 12) & 0xf;
uint32_t cp = (insn >> 8) & 0xf;
- if (IS_USER(s)) {
- return 1;
- }
if (insn & ARM_CP_RW_BIT) {
if (!env->cp[cp].cp_read)
return 1;
gen_set_pc_im(s->pc);
- tmp = new_tmp();
- gen_helper_get_cp(tmp, cpu_env, tcg_const_i32(insn));
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_const_i32(insn);
+ gen_helper_get_cp(tmp, cpu_env, tmp2);
+ tcg_temp_free(tmp2);
store_reg(s, rd, tmp);
} else {
if (!env->cp[cp].cp_write)
return 1;
gen_set_pc_im(s->pc);
tmp = load_reg(s, rd);
- gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp);
- dead_tmp(tmp);
+ tmp2 = tcg_const_i32(insn);
+ gen_helper_set_cp(cpu_env, tmp2, tmp);
+ tcg_temp_free(tmp2);
+ tcg_temp_free_i32(tmp);
}
return 0;
}
@@ -2594,12 +2497,61 @@ static int cp15_user_ok(uint32_t insn)
return 0;
}
+static int cp15_tls_load_store(CPUState *env, DisasContext *s, uint32_t insn, uint32_t rd)
+{
+ TCGv tmp;
+ int cpn = (insn >> 16) & 0xf;
+ int cpm = insn & 0xf;
+ int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
+
+ if (!arm_feature(env, ARM_FEATURE_V6K))
+ return 0;
+
+ if (!(cpn == 13 && cpm == 0))
+ return 0;
+
+ if (insn & ARM_CP_RW_BIT) {
+ switch (op) {
+ case 2:
+ tmp = load_cpu_field(cp15.c13_tls1);
+ break;
+ case 3:
+ tmp = load_cpu_field(cp15.c13_tls2);
+ break;
+ case 4:
+ tmp = load_cpu_field(cp15.c13_tls3);
+ break;
+ default:
+ return 0;
+ }
+ store_reg(s, rd, tmp);
+
+ } else {
+ tmp = load_reg(s, rd);
+ switch (op) {
+ case 2:
+ store_cpu_field(tmp, cp15.c13_tls1);
+ break;
+ case 3:
+ store_cpu_field(tmp, cp15.c13_tls2);
+ break;
+ case 4:
+ store_cpu_field(tmp, cp15.c13_tls3);
+ break;
+ default:
+ tcg_temp_free_i32(tmp);
+ return 0;
+ }
+ }
+ return 1;
+}
+
/* Disassemble system coprocessor (cp15) instruction. Return nonzero if
instruction is not defined. */
static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
{
uint32_t rd;
- TCGv tmp;
+ TCGv tmp, tmp2;
/* M profile cores use memory mapped registers instead of cp15. */
if (arm_feature(env, ARM_FEATURE_M))
@@ -2620,26 +2572,56 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
if (IS_USER(s) && !cp15_user_ok(insn)) {
return 1;
}
- if ((insn & 0x0fff0fff) == 0x0e070f90
- || (insn & 0x0fff0fff) == 0x0e070f58) {
- /* Wait for interrupt. */
- gen_set_pc_im(s->pc);
- s->is_jmp = DISAS_WFI;
+
+ /* Pre-v7 versions of the architecture implemented WFI via coprocessor
+ * instructions rather than a separate instruction.
+ */
+ if ((insn & 0x0fff0fff) == 0x0e070f90) {
+ /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores).
+ * In v7, this must NOP.
+ */
+ if (!arm_feature(env, ARM_FEATURE_V7)) {
+ /* Wait for interrupt. */
+ gen_set_pc_im(s->pc);
+ s->is_jmp = DISAS_WFI;
+ }
return 0;
}
+
+ if ((insn & 0x0fff0fff) == 0x0e070f58) {
+ /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI,
+ * so this is slightly over-broad.
+ */
+ if (!arm_feature(env, ARM_FEATURE_V6)) {
+ /* Wait for interrupt. */
+ gen_set_pc_im(s->pc);
+ s->is_jmp = DISAS_WFI;
+ return 0;
+ }
+ /* Otherwise fall through to handle via helper function.
+ * In particular, on v7 and some v6 cores this is one of
+ * the VA-PA registers.
+ */
+ }
+
rd = (insn >> 12) & 0xf;
+
+ if (cp15_tls_load_store(env, s, insn, rd))
+ return 0;
+
+ tmp2 = tcg_const_i32(insn);
if (insn & ARM_CP_RW_BIT) {
- tmp = new_tmp();
- gen_helper_get_cp15(tmp, cpu_env, tcg_const_i32(insn));
+ tmp = tcg_temp_new_i32();
+ gen_helper_get_cp15(tmp, cpu_env, tmp2);
/* If the destination register is r15 then sets condition codes. */
if (rd != 15)
store_reg(s, rd, tmp);
else
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
} else {
tmp = load_reg(s, rd);
- gen_helper_set_cp15(cpu_env, tcg_const_i32(insn), tmp);
- dead_tmp(tmp);
+ gen_helper_set_cp15(cpu_env, tmp2, tmp);
+ tcg_temp_free_i32(tmp);
/* Normally we would always end the TB here, but Linux
* arch/arm/mach-pxa/sleep.S expects two instructions following
* an MMU enable to execute from cache. Imitate this behaviour. */
@@ -2647,6 +2629,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
(insn & 0x0fff0fff) != 0x0e010f10)
gen_lookup_tb(s);
}
+ tcg_temp_free_i32(tmp2);
return 0;
}
@@ -2673,7 +2656,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
/* Move between integer and VFP cores. */
static TCGv gen_vfp_mrs(void)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_mov_i32(tmp, cpu_F0s);
return tmp;
}
@@ -2681,18 +2664,12 @@ static TCGv gen_vfp_mrs(void)
static void gen_vfp_msr(TCGv tmp)
{
tcg_gen_mov_i32(cpu_F0s, tmp);
- dead_tmp(tmp);
-}
-
-static inline int
-vfp_enabled(CPUState * env)
-{
- return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
+ tcg_temp_free_i32(tmp);
}
static void gen_neon_dup_u8(TCGv var, int shift)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
if (shift)
tcg_gen_shri_i32(var, var, shift);
tcg_gen_ext8u_i32(var, var);
@@ -2700,40 +2677,63 @@ static void gen_neon_dup_u8(TCGv var, int shift)
tcg_gen_or_i32(var, var, tmp);
tcg_gen_shli_i32(tmp, var, 16);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
static void gen_neon_dup_low16(TCGv var)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_ext16u_i32(var, var);
tcg_gen_shli_i32(tmp, var, 16);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
static void gen_neon_dup_high16(TCGv var)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_andi_i32(var, var, 0xffff0000);
tcg_gen_shri_i32(tmp, var, 16);
tcg_gen_or_i32(var, var, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
}
-/* Disassemble a VFP instruction. Returns nonzero if an error occured
+static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
+{
+ /* Load a single Neon element and replicate into a 32 bit TCG reg */
+ TCGv tmp;
+ switch (size) {
+ case 0:
+ tmp = gen_ld8u(addr, IS_USER(s));
+ gen_neon_dup_u8(tmp, 0);
+ break;
+ case 1:
+ tmp = gen_ld16u(addr, IS_USER(s));
+ gen_neon_dup_low16(tmp);
+ break;
+ case 2:
+ tmp = gen_ld32(addr, IS_USER(s));
+ break;
+ default: /* Avoid compiler warnings. */
+ abort();
+ }
+ return tmp;
+}
+
+/* Disassemble a VFP instruction. Returns nonzero if an error occurred
(ie. an undefined instruction). */
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
int dp, veclen;
+ TCGv addr;
TCGv tmp;
TCGv tmp2;
if (!arm_feature(env, ARM_FEATURE_VFP))
return 1;
- if (!vfp_enabled(env)) {
+ if (!s->vfp_enabled) {
/* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
if ((insn & 0x0fe00fff) != 0x0ee00a10)
return 1;
@@ -2812,7 +2812,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
gen_neon_dup_low16(tmp);
}
for (n = 0; n <= pass * 2; n++) {
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_mov_i32(tmp2, tmp);
neon_store_reg(rn, n, tmp2);
}
@@ -2823,12 +2823,12 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 0:
tmp2 = neon_load_reg(rn, pass);
gen_bfi(tmp, tmp2, tmp, offset, 0xff);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
break;
case 1:
tmp2 = neon_load_reg(rn, pass);
gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
break;
case 2:
break;
@@ -2874,7 +2874,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
} else {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
gen_helper_vfp_get_fpscr(tmp, cpu_env);
}
break;
@@ -2895,7 +2895,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (rd == 15) {
/* Set the 4 flag bits in the CPSR. */
gen_set_nzcv(tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
} else {
store_reg(s, rd, tmp);
}
@@ -2913,12 +2913,15 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
break;
case ARM_VFP_FPSCR:
gen_helper_vfp_set_fpscr(cpu_env, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
gen_lookup_tb(s);
break;
case ARM_VFP_FPEXC:
if (IS_USER(s))
return 1;
+ /* TODO: VFP subarchitecture support.
+ * For now, keep the EN bit only */
+ tcg_gen_andi_i32(tmp, tmp, 1 << 30);
store_cpu_field(tmp, vfp.xregs[rn]);
gen_lookup_tb(s);
break;
@@ -2948,16 +2951,18 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
VFP_DREG_N(rn, insn);
}
- if (op == 15 && (rn == 15 || rn > 17)) {
+ if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
/* Integer or single precision destination. */
rd = VFP_SREG_D(insn);
} else {
VFP_DREG_D(rd, insn);
}
-
- if (op == 15 && (rn == 16 || rn == 17)) {
- /* Integer source. */
- rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+ if (op == 15 &&
+ (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
+ /* VCVT from int is always from S reg regardless of dp bit.
+ * VCVT with immediate frac_bits has same format as SREG_M
+ */
+ rm = VFP_SREG_M(insn);
} else {
VFP_DREG_M(rm, insn);
}
@@ -2969,10 +2974,13 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
} else {
rd = VFP_SREG_D(insn);
}
+ /* NB that we implicitly rely on the encoding for the frac_bits
+ * in VCVT of fixed to float being the same as that of an SREG_M
+ */
rm = VFP_SREG_M(insn);
}
- veclen = env->vfp.vec_len;
+ veclen = s->vec_len;
if (op == 15 && rn > 3)
veclen = 0;
@@ -2993,9 +3001,9 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
veclen = 0;
} else {
if (dp)
- delta_d = (env->vfp.vec_stride >> 1) + 1;
+ delta_d = (s->vec_stride >> 1) + 1;
else
- delta_d = env->vfp.vec_stride + 1;
+ delta_d = s->vec_stride + 1;
if ((rm & bank_mask) == 0) {
/* mixed scalar/vector */
@@ -3052,27 +3060,34 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
for (;;) {
/* Perform the calculation. */
switch (op) {
- case 0: /* mac: fd + (fn * fm) */
- gen_vfp_mul(dp);
- gen_mov_F1_vreg(dp, rd);
+ case 0: /* VMLA: fd + (fn * fm) */
+ /* Note that order of inputs to the add matters for NaNs */
+ gen_vfp_F1_mul(dp);
+ gen_mov_F0_vreg(dp, rd);
gen_vfp_add(dp);
break;
- case 1: /* nmac: fd - (fn * fm) */
+ case 1: /* VMLS: fd + -(fn * fm) */
gen_vfp_mul(dp);
- gen_vfp_neg(dp);
- gen_mov_F1_vreg(dp, rd);
+ gen_vfp_F1_neg(dp);
+ gen_mov_F0_vreg(dp, rd);
gen_vfp_add(dp);
break;
- case 2: /* msc: -fd + (fn * fm) */
- gen_vfp_mul(dp);
- gen_mov_F1_vreg(dp, rd);
- gen_vfp_sub(dp);
+ case 2: /* VNMLS: -fd + (fn * fm) */
+ /* Note that it isn't valid to replace (-A + B) with (B - A)
+ * or similar plausible looking simplifications
+ * because this will give wrong results for NaNs.
+ */
+ gen_vfp_F1_mul(dp);
+ gen_mov_F0_vreg(dp, rd);
+ gen_vfp_neg(dp);
+ gen_vfp_add(dp);
break;
- case 3: /* nmsc: -fd - (fn * fm) */
+ case 3: /* VNMLA: -fd + -(fn * fm) */
gen_vfp_mul(dp);
+ gen_vfp_F1_neg(dp);
+ gen_mov_F0_vreg(dp, rd);
gen_vfp_neg(dp);
- gen_mov_F1_vreg(dp, rd);
- gen_vfp_sub(dp);
+ gen_vfp_add(dp);
break;
case 4: /* mul: fn * fm */
gen_vfp_mul(dp);
@@ -3126,6 +3141,47 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 3: /* sqrt */
gen_vfp_sqrt(dp);
break;
+ case 4: /* vcvtb.f32.f16 */
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+ return 1;
+ tmp = gen_vfp_mrs();
+ tcg_gen_ext16u_i32(tmp, tmp);
+ gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+ tcg_temp_free_i32(tmp);
+ break;
+ case 5: /* vcvtt.f32.f16 */
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+ return 1;
+ tmp = gen_vfp_mrs();
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
+ tcg_temp_free_i32(tmp);
+ break;
+ case 6: /* vcvtb.f16.f32 */
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+ return 1;
+ tmp = tcg_temp_new_i32();
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ gen_mov_F0_vreg(0, rd);
+ tmp2 = gen_vfp_mrs();
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ gen_vfp_msr(tmp);
+ break;
+ case 7: /* vcvtt.f16.f32 */
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16))
+ return 1;
+ tmp = tcg_temp_new_i32();
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ tcg_gen_shli_i32(tmp, tmp, 16);
+ gen_mov_F0_vreg(0, rd);
+ tmp2 = gen_vfp_mrs();
+ tcg_gen_ext16u_i32(tmp2, tmp2);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ gen_vfp_msr(tmp);
+ break;
case 8: /* cmp */
gen_vfp_cmp(dp);
break;
@@ -3146,62 +3202,62 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
break;
case 16: /* fuito */
- gen_vfp_uito(dp);
+ gen_vfp_uito(dp, 0);
break;
case 17: /* fsito */
- gen_vfp_sito(dp);
+ gen_vfp_sito(dp, 0);
break;
case 20: /* fshto */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_shto(dp, 16 - rm);
+ gen_vfp_shto(dp, 16 - rm, 0);
break;
case 21: /* fslto */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_slto(dp, 32 - rm);
+ gen_vfp_slto(dp, 32 - rm, 0);
break;
case 22: /* fuhto */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_uhto(dp, 16 - rm);
+ gen_vfp_uhto(dp, 16 - rm, 0);
break;
case 23: /* fulto */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_ulto(dp, 32 - rm);
+ gen_vfp_ulto(dp, 32 - rm, 0);
break;
case 24: /* ftoui */
- gen_vfp_toui(dp);
+ gen_vfp_toui(dp, 0);
break;
case 25: /* ftouiz */
- gen_vfp_touiz(dp);
+ gen_vfp_touiz(dp, 0);
break;
case 26: /* ftosi */
- gen_vfp_tosi(dp);
+ gen_vfp_tosi(dp, 0);
break;
case 27: /* ftosiz */
- gen_vfp_tosiz(dp);
+ gen_vfp_tosiz(dp, 0);
break;
case 28: /* ftosh */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_tosh(dp, 16 - rm);
+ gen_vfp_tosh(dp, 16 - rm, 0);
break;
case 29: /* ftosl */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_tosl(dp, 32 - rm);
+ gen_vfp_tosl(dp, 32 - rm, 0);
break;
case 30: /* ftouh */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_touh(dp, 16 - rm);
+ gen_vfp_touh(dp, 16 - rm, 0);
break;
case 31: /* ftoul */
if (!arm_feature(env, ARM_FEATURE_VFP3))
return 1;
- gen_vfp_toul(dp, 32 - rm);
+ gen_vfp_toul(dp, 32 - rm, 0);
break;
default: /* undefined */
printf ("rn:%d\n", rn);
@@ -3216,8 +3272,8 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
/* Write back the result. */
if (op == 15 && (rn >= 8 && rn <= 11))
; /* Comparison, do nothing. */
- else if (op == 15 && rn > 17)
- /* Integer result. */
+ else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
+ /* VCVT double to int: always integer result. */
gen_mov_vreg_F0(0, rd);
else if (op == 15 && rn == 15)
/* conversion */
@@ -3264,7 +3320,7 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
break;
case 0xc:
case 0xd:
- if (dp && (insn & 0x03e00000) == 0x00400000) {
+ if ((insn & 0x03e00000) == 0x00400000) {
/* two-register transfer */
rn = (insn >> 16) & 0xf;
rd = (insn >> 12) & 0xf;
@@ -3286,10 +3342,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
} else {
gen_mov_F0_vreg(0, rm);
tmp = gen_vfp_mrs();
- store_reg(s, rn, tmp);
+ store_reg(s, rd, tmp);
gen_mov_F0_vreg(0, rm + 1);
tmp = gen_vfp_mrs();
- store_reg(s, rd, tmp);
+ store_reg(s, rn, tmp);
}
} else {
/* arm->vfp */
@@ -3301,10 +3357,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
gen_vfp_msr(tmp);
gen_mov_vreg_F0(0, rm * 2 + 1);
} else {
- tmp = load_reg(s, rn);
+ tmp = load_reg(s, rd);
gen_vfp_msr(tmp);
gen_mov_vreg_F0(0, rm);
- tmp = load_reg(s, rd);
+ tmp = load_reg(s, rn);
gen_vfp_msr(tmp);
gen_mov_vreg_F0(0, rm + 1);
}
@@ -3317,23 +3373,25 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
else
rd = VFP_SREG_D(insn);
if (s->thumb && rn == 15) {
- gen_op_movl_T1_im(s->pc & ~2);
+ addr = tcg_temp_new_i32();
+ tcg_gen_movi_i32(addr, s->pc & ~2);
} else {
- gen_movl_T1_reg(s, rn);
+ addr = load_reg(s, rn);
}
if ((insn & 0x01200000) == 0x01000000) {
/* Single load/store */
offset = (insn & 0xff) << 2;
if ((insn & (1 << 23)) == 0)
offset = -offset;
- gen_op_addl_T1_im(offset);
+ tcg_gen_addi_i32(addr, addr, offset);
if (insn & (1 << 20)) {
- gen_vfp_ld(s, dp);
+ gen_vfp_ld(s, dp, addr);
gen_mov_vreg_F0(dp, rd);
} else {
gen_mov_F0_vreg(dp, rd);
- gen_vfp_st(s, dp);
+ gen_vfp_st(s, dp, addr);
}
+ tcg_temp_free_i32(addr);
} else {
/* load/store multiple */
if (dp)
@@ -3342,24 +3400,26 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
n = insn & 0xff;
if (insn & (1 << 24)) /* pre-decrement */
- gen_op_addl_T1_im(-((insn & 0xff) << 2));
+ tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
if (dp)
offset = 8;
else
offset = 4;
+ tmp = tcg_const_i32(offset);
for (i = 0; i < n; i++) {
if (insn & ARM_CP_RW_BIT) {
/* load */
- gen_vfp_ld(s, dp);
+ gen_vfp_ld(s, dp, addr);
gen_mov_vreg_F0(dp, rd + i);
} else {
/* store */
gen_mov_F0_vreg(dp, rd + i);
- gen_vfp_st(s, dp);
+ gen_vfp_st(s, dp, addr);
}
- gen_op_addl_T1_im(offset);
+ tcg_gen_add_i32(addr, addr, tmp);
}
+ tcg_temp_free_i32(tmp);
if (insn & (1 << 21)) {
/* writeback */
if (insn & (1 << 24))
@@ -3370,8 +3430,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
offset = 0;
if (offset != 0)
- gen_op_addl_T1_im(offset);
- gen_movl_reg_T1(s, rn);
+ tcg_gen_addi_i32(addr, addr, offset);
+ store_reg(s, rn, addr);
+ } else {
+ tcg_temp_free_i32(addr);
}
}
}
@@ -3391,7 +3453,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
tcg_gen_goto_tb(n);
gen_set_pc_im(dest);
- tcg_gen_exit_tb((long)tb + n);
+ tcg_gen_exit_tb((tcg_target_long)tb + n);
} else {
gen_set_pc_im(dest);
tcg_gen_exit_tb(0);
@@ -3440,6 +3502,10 @@ static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
/* Mask out undefined bits. */
mask &= ~CPSR_RESERVED;
+ if (!arm_feature(env, ARM_FEATURE_V4T))
+ mask &= ~CPSR_T;
+ if (!arm_feature(env, ARM_FEATURE_V5))
+ mask &= ~CPSR_Q; /* V5TE in reality*/
if (!arm_feature(env, ARM_FEATURE_V6))
mask &= ~(CPSR_E | CPSR_GE);
if (!arm_feature(env, ARM_FEATURE_THUMB2))
@@ -3453,8 +3519,8 @@ static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
return mask;
}
-/* Returns nonzero if access to the PSR is not permitted. */
-static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
+/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
+static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
{
TCGv tmp;
if (spsr) {
@@ -3464,16 +3530,26 @@ static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
tmp = load_cpu_field(spsr);
tcg_gen_andi_i32(tmp, tmp, ~mask);
- tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
- tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
+ tcg_gen_andi_i32(t0, t0, mask);
+ tcg_gen_or_i32(tmp, tmp, t0);
store_cpu_field(tmp, spsr);
} else {
- gen_set_cpsr(cpu_T[0], mask);
+ gen_set_cpsr(t0, mask);
}
+ tcg_temp_free_i32(t0);
gen_lookup_tb(s);
return 0;
}
+/* Returns nonzero if access to the PSR is not permitted. */
+static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
+{
+ TCGv tmp;
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, val);
+ return gen_set_psr(s, mask, spsr, tmp);
+}
+
/* Generate an old-style exception return. Marks pc as dead. */
static void gen_exception_return(DisasContext *s, TCGv pc)
{
@@ -3481,7 +3557,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc)
store_reg(s, 15, pc);
tmp = load_cpu_field(spsr);
gen_set_cpsr(tmp, 0xffffffff);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
s->is_jmp = DISAS_UPDATE;
}
@@ -3489,7 +3565,7 @@ static void gen_exception_return(DisasContext *s, TCGv pc)
static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
{
gen_set_cpsr(cpsr, 0xffffffff);
- dead_tmp(cpsr);
+ tcg_temp_free_i32(cpsr);
store_reg(s, 15, pc);
s->is_jmp = DISAS_UPDATE;
}
@@ -3499,15 +3575,18 @@ gen_set_condexec (DisasContext *s)
{
if (s->condexec_mask) {
uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, val);
store_cpu_field(tmp, condexec_bits);
}
- else if (s->condexec_mask_prev != 0) {
- TCGv tmp = new_tmp();
- tcg_gen_movi_i32(tmp, 0);
- store_cpu_field(tmp, condexec_bits);
- }
+}
+
+static void gen_exception_insn(DisasContext *s, int offset, int excp)
+{
+ gen_set_condexec(s);
+ gen_set_pc_im(s->pc - offset);
+ gen_exception(excp);
+ s->is_jmp = DISAS_JUMP;
}
static void gen_nop_hint(DisasContext *s, int val)
@@ -3525,31 +3604,24 @@ static void gen_nop_hint(DisasContext *s, int val)
}
}
-/* These macros help make the code more readable when migrating from the
- old dyngen helpers. They should probably be removed when
- T0/T1 are removed. */
-#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]
-#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]
-
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-static inline int gen_neon_add(int size)
+static inline void gen_neon_add(int size, TCGv t0, TCGv t1)
{
switch (size) {
- case 0: gen_helper_neon_add_u8(CPU_T001); break;
- case 1: gen_helper_neon_add_u16(CPU_T001); break;
- case 2: gen_op_addl_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
+ case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
+ case 2: tcg_gen_add_i32(t0, t0, t1); break;
+ default: abort();
}
- return 0;
}
-static inline void gen_neon_rsb(int size)
+static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
{
switch (size) {
- case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;
- case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;
- case 2: gen_op_rsbl_T0_T1(); break;
+ case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
+ case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
+ case 2: tcg_gen_sub_i32(t0, t1, t0); break;
default: return;
}
}
@@ -3560,125 +3632,178 @@ static inline void gen_neon_rsb(int size)
#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
-/* FIXME: This is wrong. They set the wrong overflow bit. */
-#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c)
-#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c)
-#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c)
-#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c)
-
-#define GEN_NEON_INTEGER_OP_ENV(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
- break; \
- default: return 1; \
- }} while (0)
-
#define GEN_NEON_INTEGER_OP(name) do { \
switch ((size << 1) | u) { \
case 0: \
- gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
break; \
case 1: \
- gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
break; \
case 2: \
- gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
break; \
case 3: \
- gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
break; \
case 4: \
- gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
break; \
case 5: \
- gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \
+ gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
break; \
default: return 1; \
}} while (0)
-static inline void
-gen_neon_movl_scratch_T0(int scratch)
+static TCGv neon_load_scratch(int scratch)
{
- uint32_t offset;
-
- offset = offsetof(CPUARMState, vfp.scratch[scratch]);
- tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
+ TCGv tmp = tcg_temp_new_i32();
+ tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
+ return tmp;
}
-static inline void
-gen_neon_movl_scratch_T1(int scratch)
+static void neon_store_scratch(int scratch, TCGv var)
{
- uint32_t offset;
-
- offset = offsetof(CPUARMState, vfp.scratch[scratch]);
- tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
+ tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
+ tcg_temp_free_i32(var);
}
-static inline void
-gen_neon_movl_T0_scratch(int scratch)
+static inline TCGv neon_get_scalar(int size, int reg)
{
- uint32_t offset;
-
- offset = offsetof(CPUARMState, vfp.scratch[scratch]);
- tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
+ TCGv tmp;
+ if (size == 1) {
+ tmp = neon_load_reg(reg & 7, reg >> 4);
+ if (reg & 8) {
+ gen_neon_dup_high16(tmp);
+ } else {
+ gen_neon_dup_low16(tmp);
+ }
+ } else {
+ tmp = neon_load_reg(reg & 15, reg >> 4);
+ }
+ return tmp;
}
-static inline void
-gen_neon_movl_T1_scratch(int scratch)
+static int gen_neon_unzip(int rd, int rm, int size, int q)
{
- uint32_t offset;
-
- offset = offsetof(CPUARMState, vfp.scratch[scratch]);
- tcg_gen_ld_i32(cpu_T[1], cpu_env, offset);
+ TCGv tmp, tmp2;
+ if (!q && size == 2) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+ if (q) {
+ switch (size) {
+ case 0:
+ gen_helper_neon_qunzip8(tmp, tmp2);
+ break;
+ case 1:
+ gen_helper_neon_qunzip16(tmp, tmp2);
+ break;
+ case 2:
+ gen_helper_neon_qunzip32(tmp, tmp2);
+ break;
+ default:
+ abort();
+ }
+ } else {
+ switch (size) {
+ case 0:
+ gen_helper_neon_unzip8(tmp, tmp2);
+ break;
+ case 1:
+ gen_helper_neon_unzip16(tmp, tmp2);
+ break;
+ default:
+ abort();
+ }
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ return 0;
}
-static inline void gen_neon_get_scalar(int size, int reg)
+static int gen_neon_zip(int rd, int rm, int size, int q)
{
- if (size == 1) {
- NEON_GET_REG(T0, reg >> 1, reg & 1);
+ TCGv tmp, tmp2;
+ if (!q && size == 2) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+ if (q) {
+ switch (size) {
+ case 0:
+ gen_helper_neon_qzip8(tmp, tmp2);
+ break;
+ case 1:
+ gen_helper_neon_qzip16(tmp, tmp2);
+ break;
+ case 2:
+ gen_helper_neon_qzip32(tmp, tmp2);
+ break;
+ default:
+ abort();
+ }
} else {
- NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
- if (reg & 1)
- gen_neon_dup_low16(cpu_T[0]);
- else
- gen_neon_dup_high16(cpu_T[0]);
+ switch (size) {
+ case 0:
+ gen_helper_neon_zip8(tmp, tmp2);
+ break;
+ case 1:
+ gen_helper_neon_zip16(tmp, tmp2);
+ break;
+ default:
+ abort();
+ }
}
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ return 0;
}
-static void gen_neon_unzip(int reg, int q, int tmp, int size)
+static void gen_neon_trn_u8(TCGv t0, TCGv t1)
{
- int n;
+ TCGv rd, tmp;
- for (n = 0; n < q + 1; n += 2) {
- NEON_GET_REG(T0, reg, n);
- NEON_GET_REG(T0, reg, n + n);
- switch (size) {
- case 0: gen_helper_neon_unzip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */
- case 2: /* no-op */; break;
- default: abort();
- }
- gen_neon_movl_scratch_T0(tmp + n);
- gen_neon_movl_scratch_T1(tmp + n + 1);
- }
+ rd = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(rd, t0, 8);
+ tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+ tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(t1, t1, 8);
+ tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+ tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(rd);
}
+static void gen_neon_trn_u16(TCGv t0, TCGv t1)
+{
+ TCGv rd, tmp;
+
+ rd = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(rd, t0, 16);
+ tcg_gen_andi_i32(tmp, t1, 0xffff);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shri_i32(t1, t1, 16);
+ tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(rd);
+}
+
+
static struct {
int nregs;
int interleave;
@@ -3705,17 +3830,18 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
int op;
int nregs;
int interleave;
+ int spacing;
int stride;
int size;
int reg;
int pass;
int load;
int shift;
- int n;
+ TCGv addr;
TCGv tmp;
TCGv tmp2;
- if (!vfp_enabled(env))
+ if (!s->vfp_enabled)
return 1;
VFP_DREG_D(rd, insn);
rn = (insn >> 16) & 0xf;
@@ -3725,119 +3851,86 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
/* Load store all elements. */
op = (insn >> 8) & 0xf;
size = (insn >> 6) & 3;
- if (op > 10 || size == 3)
+ if (op > 10)
return 1;
+ /* Catch UNDEF cases for bad values of align field */
+ switch (op & 0xc) {
+ case 4:
+ if (((insn >> 5) & 1) == 1) {
+ return 1;
+ }
+ break;
+ case 8:
+ if (((insn >> 4) & 3) == 3) {
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
nregs = neon_ls_element_type[op].nregs;
interleave = neon_ls_element_type[op].interleave;
- gen_movl_T1_reg(s, rn);
- stride = (1 << size) * interleave;
- for (reg = 0; reg < nregs; reg++) {
- if (interleave > 2 || (interleave == 2 && nregs == 2)) {
- gen_movl_T1_reg(s, rn);
- gen_op_addl_T1_im((1 << size) * reg);
- } else if (interleave == 2 && nregs == 4 && reg == 2) {
- gen_movl_T1_reg(s, rn);
- gen_op_addl_T1_im(1 << size);
- }
- for (pass = 0; pass < 2; pass++) {
- if (size == 2) {
- if (load) {
- tmp = gen_ld32(cpu_T[1], IS_USER(s));
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- gen_st32(tmp, cpu_T[1], IS_USER(s));
- }
- gen_op_addl_T1_im(stride);
- } else if (size == 1) {
- if (load) {
- tmp = gen_ld16u(cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
- dead_tmp(tmp2);
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- tmp2 = new_tmp();
- tcg_gen_shri_i32(tmp2, tmp, 16);
- gen_st16(tmp, cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- gen_st16(tmp2, cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- }
- } else /* size == 0 */ {
- if (load) {
- TCGV_UNUSED(tmp2);
- for (n = 0; n < 4; n++) {
- tmp = gen_ld8u(cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- if (n == 0) {
- tmp2 = tmp;
- } else {
- gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
- dead_tmp(tmp);
- }
- }
- neon_store_reg(rd, pass, tmp2);
- } else {
- tmp2 = neon_load_reg(rd, pass);
- for (n = 0; n < 4; n++) {
- tmp = new_tmp();
- if (n == 0) {
- tcg_gen_mov_i32(tmp, tmp2);
- } else {
- tcg_gen_shri_i32(tmp, tmp2, n * 8);
- }
- gen_st8(tmp, cpu_T[1], IS_USER(s));
- gen_op_addl_T1_im(stride);
- }
- dead_tmp(tmp2);
- }
- }
- }
- rd += neon_ls_element_type[op].spacing;
+ spacing = neon_ls_element_type[op].spacing;
+ if (size == 3 && (interleave | spacing) != 1) {
+ return 1;
}
+ addr = tcg_const_i32(insn);
+ gen_helper_neon_vldst_all(addr);
+ tcg_temp_free_i32(addr);
stride = nregs * 8;
} else {
size = (insn >> 10) & 3;
if (size == 3) {
/* Load single element to all lanes. */
- if (!load)
+ int a = (insn >> 4) & 1;
+ if (!load) {
return 1;
+ }
size = (insn >> 6) & 3;
nregs = ((insn >> 8) & 3) + 1;
- stride = (insn & (1 << 5)) ? 2 : 1;
- gen_movl_T1_reg(s, rn);
- for (reg = 0; reg < nregs; reg++) {
- switch (size) {
- case 0:
- tmp = gen_ld8u(cpu_T[1], IS_USER(s));
- gen_neon_dup_u8(tmp, 0);
- break;
- case 1:
- tmp = gen_ld16u(cpu_T[1], IS_USER(s));
- gen_neon_dup_low16(tmp);
- break;
- case 2:
- tmp = gen_ld32(cpu_T[1], IS_USER(s));
- break;
- case 3:
+
+ if (size == 3) {
+ if (nregs != 4 || a == 0) {
return 1;
- default: /* Avoid compiler warnings. */
- abort();
}
- gen_op_addl_T1_im(1 << size);
- tmp2 = new_tmp();
- tcg_gen_mov_i32(tmp2, tmp);
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp);
- rd += stride;
+ /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
+ size = 2;
+ }
+ if (nregs == 1 && a == 1 && size == 0) {
+ return 1;
+ }
+ if (nregs == 3 && a == 1) {
+ return 1;
}
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, rn);
+ if (nregs == 1) {
+ /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
+ tmp = gen_load_and_replicate(s, addr, size);
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
+ if (insn & (1 << 5)) {
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
+ }
+ tcg_temp_free_i32(tmp);
+ } else {
+ /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
+ stride = (insn & (1 << 5)) ? 2 : 1;
+ for (reg = 0; reg < nregs; reg++) {
+ tmp = gen_load_and_replicate(s, addr, size);
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
+ tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
+ tcg_temp_free_i32(tmp);
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+ rd += stride;
+ }
+ }
+ tcg_temp_free_i32(addr);
stride = (1 << size) * nregs;
} else {
/* Single element. */
+ int idx = (insn >> 4) & 0xf;
pass = (insn >> 7) & 1;
switch (size) {
case 0:
@@ -3856,18 +3949,52 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
abort();
}
nregs = ((insn >> 8) & 3) + 1;
- gen_movl_T1_reg(s, rn);
+ /* Catch the UNDEF cases. This is unavoidably a bit messy. */
+ switch (nregs) {
+ case 1:
+ if (((idx & (1 << size)) != 0) ||
+ (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
+ return 1;
+ }
+ break;
+ case 3:
+ if ((idx & 1) != 0) {
+ return 1;
+ }
+ /* fall through */
+ case 2:
+ if (size == 2 && (idx & 2) != 0) {
+ return 1;
+ }
+ break;
+ case 4:
+ if ((size == 2) && ((idx & 3) == 3)) {
+ return 1;
+ }
+ break;
+ default:
+ abort();
+ }
+ if ((rd + stride * (nregs - 1)) > 31) {
+ /* Attempts to write off the end of the register file
+ * are UNPREDICTABLE; we choose to UNDEF because otherwise
+ * the neon_load_reg() would write off the end of the array.
+ */
+ return 1;
+ }
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, rn);
for (reg = 0; reg < nregs; reg++) {
if (load) {
switch (size) {
case 0:
- tmp = gen_ld8u(cpu_T[1], IS_USER(s));
+ tmp = gen_ld8u(addr, IS_USER(s));
break;
case 1:
- tmp = gen_ld16u(cpu_T[1], IS_USER(s));
+ tmp = gen_ld16u(addr, IS_USER(s));
break;
case 2:
- tmp = gen_ld32(cpu_T[1], IS_USER(s));
+ tmp = gen_ld32(addr, IS_USER(s));
break;
default: /* Avoid compiler warnings. */
abort();
@@ -3875,7 +4002,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (size != 2) {
tmp2 = neon_load_reg(rd, pass);
gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
neon_store_reg(rd, pass, tmp);
} else { /* Store */
@@ -3884,19 +4011,20 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
tcg_gen_shri_i32(tmp, tmp, shift);
switch (size) {
case 0:
- gen_st8(tmp, cpu_T[1], IS_USER(s));
+ gen_st8(tmp, addr, IS_USER(s));
break;
case 1:
- gen_st16(tmp, cpu_T[1], IS_USER(s));
+ gen_st16(tmp, addr, IS_USER(s));
break;
case 2:
- gen_st32(tmp, cpu_T[1], IS_USER(s));
+ gen_st32(tmp, addr, IS_USER(s));
break;
}
}
rd += stride;
- gen_op_addl_T1_im(1 << size);
+ tcg_gen_addi_i32(addr, addr, 1 << size);
}
+ tcg_temp_free_i32(addr);
stride = nregs * (1 << size);
}
}
@@ -3910,7 +4038,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
TCGv index;
index = load_reg(s, rm);
tcg_gen_add_i32(base, base, index);
- dead_tmp(index);
+ tcg_temp_free_i32(index);
}
store_reg(s, rn, base);
}
@@ -3921,7 +4049,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
{
tcg_gen_and_i32(t, t, c);
- tcg_gen_bic_i32(f, f, c);
+ tcg_gen_andc_i32(f, f, c);
tcg_gen_or_i32(dest, t, f);
}
@@ -3938,9 +4066,9 @@ static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
{
switch (size) {
- case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
+ case 0: gen_helper_neon_narrow_sat_s8(dest, src); break;
+ case 1: gen_helper_neon_narrow_sat_s16(dest, src); break;
+ case 2: gen_helper_neon_narrow_sat_s32(dest, src); break;
default: abort();
}
}
@@ -3948,9 +4076,19 @@ static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
{
switch (size) {
- case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
+ case 0: gen_helper_neon_narrow_sat_u8(dest, src); break;
+ case 1: gen_helper_neon_narrow_sat_u16(dest, src); break;
+ case 2: gen_helper_neon_narrow_sat_u32(dest, src); break;
+ default: abort();
+ }
+}
+
+static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
+{
+ switch (size) {
+ case 0: gen_helper_neon_unarrow_sat8(dest, src); break;
+ case 1: gen_helper_neon_unarrow_sat16(dest, src); break;
+ case 2: gen_helper_neon_unarrow_sat32(dest, src); break;
default: abort();
}
}
@@ -3975,8 +4113,8 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
} else {
if (u) {
switch (size) {
- case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
+ case 1: gen_helper_neon_shl_u16(var, var, shift); break;
+ case 2: gen_helper_neon_shl_u32(var, var, shift); break;
default: abort();
}
} else {
@@ -4006,7 +4144,7 @@ static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
default: abort();
}
}
- dead_tmp(src);
+ tcg_temp_free_i32(src);
}
static inline void gen_neon_addl(int size)
@@ -4042,8 +4180,8 @@ static inline void gen_neon_negl(TCGv_i64 var, int size)
static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
{
switch (size) {
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
+ case 1: gen_helper_neon_addl_saturate_s32(op0, op0, op1); break;
+ case 2: gen_helper_neon_addl_saturate_s64(op0, op0, op1); break;
default: abort();
}
}
@@ -4060,19 +4198,216 @@ static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
case 4:
tmp = gen_muls_i64_i32(a, b);
tcg_gen_mov_i64(dest, tmp);
+ tcg_temp_free_i64(tmp);
break;
case 5:
tmp = gen_mulu_i64_i32(a, b);
tcg_gen_mov_i64(dest, tmp);
+ tcg_temp_free_i64(tmp);
break;
default: abort();
}
+
+ /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
+ Don't forget to clean them now. */
if (size < 2) {
- dead_tmp(b);
- dead_tmp(a);
+ tcg_temp_free_i32(a);
+ tcg_temp_free_i32(b);
+ }
+}
+
+static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
+{
+ if (op) {
+ if (u) {
+ gen_neon_unarrow_sats(size, dest, src);
+ } else {
+ gen_neon_narrow(size, dest, src);
+ }
+ } else {
+ if (u) {
+ gen_neon_narrow_satu(size, dest, src);
+ } else {
+ gen_neon_narrow_sats(size, dest, src);
+ }
}
}
+/* Symbolic constants for op fields for Neon 3-register same-length.
+ * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
+ * table A7-9.
+ */
+#define NEON_3R_VHADD 0
+#define NEON_3R_VQADD 1
+#define NEON_3R_VRHADD 2
+#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
+#define NEON_3R_VHSUB 4
+#define NEON_3R_VQSUB 5
+#define NEON_3R_VCGT 6
+#define NEON_3R_VCGE 7
+#define NEON_3R_VSHL 8
+#define NEON_3R_VQSHL 9
+#define NEON_3R_VRSHL 10
+#define NEON_3R_VQRSHL 11
+#define NEON_3R_VMAX 12
+#define NEON_3R_VMIN 13
+#define NEON_3R_VABD 14
+#define NEON_3R_VABA 15
+#define NEON_3R_VADD_VSUB 16
+#define NEON_3R_VTST_VCEQ 17
+#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
+#define NEON_3R_VMUL 19
+#define NEON_3R_VPMAX 20
+#define NEON_3R_VPMIN 21
+#define NEON_3R_VQDMULH_VQRDMULH 22
+#define NEON_3R_VPADD 23
+#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
+#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
+#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
+#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
+#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
+#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
+
+static const uint8_t neon_3r_sizes[] = {
+ [NEON_3R_VHADD] = 0x7,
+ [NEON_3R_VQADD] = 0xf,
+ [NEON_3R_VRHADD] = 0x7,
+ [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
+ [NEON_3R_VHSUB] = 0x7,
+ [NEON_3R_VQSUB] = 0xf,
+ [NEON_3R_VCGT] = 0x7,
+ [NEON_3R_VCGE] = 0x7,
+ [NEON_3R_VSHL] = 0xf,
+ [NEON_3R_VQSHL] = 0xf,
+ [NEON_3R_VRSHL] = 0xf,
+ [NEON_3R_VQRSHL] = 0xf,
+ [NEON_3R_VMAX] = 0x7,
+ [NEON_3R_VMIN] = 0x7,
+ [NEON_3R_VABD] = 0x7,
+ [NEON_3R_VABA] = 0x7,
+ [NEON_3R_VADD_VSUB] = 0xf,
+ [NEON_3R_VTST_VCEQ] = 0x7,
+ [NEON_3R_VML] = 0x7,
+ [NEON_3R_VMUL] = 0x7,
+ [NEON_3R_VPMAX] = 0x7,
+ [NEON_3R_VPMIN] = 0x7,
+ [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
+ [NEON_3R_VPADD] = 0x7,
+ [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
+ [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
+ [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
+ [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
+ [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
+ [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
+};
+
+/* Symbolic constants for op fields for Neon 2-register miscellaneous.
+ * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
+ * table A7-13.
+ */
+#define NEON_2RM_VREV64 0
+#define NEON_2RM_VREV32 1
+#define NEON_2RM_VREV16 2
+#define NEON_2RM_VPADDL 4
+#define NEON_2RM_VPADDL_U 5
+#define NEON_2RM_VCLS 8
+#define NEON_2RM_VCLZ 9
+#define NEON_2RM_VCNT 10
+#define NEON_2RM_VMVN 11
+#define NEON_2RM_VPADAL 12
+#define NEON_2RM_VPADAL_U 13
+#define NEON_2RM_VQABS 14
+#define NEON_2RM_VQNEG 15
+#define NEON_2RM_VCGT0 16
+#define NEON_2RM_VCGE0 17
+#define NEON_2RM_VCEQ0 18
+#define NEON_2RM_VCLE0 19
+#define NEON_2RM_VCLT0 20
+#define NEON_2RM_VABS 22
+#define NEON_2RM_VNEG 23
+#define NEON_2RM_VCGT0_F 24
+#define NEON_2RM_VCGE0_F 25
+#define NEON_2RM_VCEQ0_F 26
+#define NEON_2RM_VCLE0_F 27
+#define NEON_2RM_VCLT0_F 28
+#define NEON_2RM_VABS_F 30
+#define NEON_2RM_VNEG_F 31
+#define NEON_2RM_VSWP 32
+#define NEON_2RM_VTRN 33
+#define NEON_2RM_VUZP 34
+#define NEON_2RM_VZIP 35
+#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
+#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
+#define NEON_2RM_VSHLL 38
+#define NEON_2RM_VCVT_F16_F32 44
+#define NEON_2RM_VCVT_F32_F16 46
+#define NEON_2RM_VRECPE 56
+#define NEON_2RM_VRSQRTE 57
+#define NEON_2RM_VRECPE_F 58
+#define NEON_2RM_VRSQRTE_F 59
+#define NEON_2RM_VCVT_FS 60
+#define NEON_2RM_VCVT_FU 61
+#define NEON_2RM_VCVT_SF 62
+#define NEON_2RM_VCVT_UF 63
+
+static int neon_2rm_is_float_op(int op)
+{
+ /* Return true if this neon 2reg-misc op is float-to-float */
+ return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
+ op >= NEON_2RM_VRECPE_F);
+}
+
+/* Each entry in this array has bit n set if the insn allows
+ * size value n (otherwise it will UNDEF). Since unallocated
+ * op values will have no bits set they always UNDEF.
+ */
+static const uint8_t neon_2rm_sizes[] = {
+ [NEON_2RM_VREV64] = 0x7,
+ [NEON_2RM_VREV32] = 0x3,
+ [NEON_2RM_VREV16] = 0x1,
+ [NEON_2RM_VPADDL] = 0x7,
+ [NEON_2RM_VPADDL_U] = 0x7,
+ [NEON_2RM_VCLS] = 0x7,
+ [NEON_2RM_VCLZ] = 0x7,
+ [NEON_2RM_VCNT] = 0x1,
+ [NEON_2RM_VMVN] = 0x1,
+ [NEON_2RM_VPADAL] = 0x7,
+ [NEON_2RM_VPADAL_U] = 0x7,
+ [NEON_2RM_VQABS] = 0x7,
+ [NEON_2RM_VQNEG] = 0x7,
+ [NEON_2RM_VCGT0] = 0x7,
+ [NEON_2RM_VCGE0] = 0x7,
+ [NEON_2RM_VCEQ0] = 0x7,
+ [NEON_2RM_VCLE0] = 0x7,
+ [NEON_2RM_VCLT0] = 0x7,
+ [NEON_2RM_VABS] = 0x7,
+ [NEON_2RM_VNEG] = 0x7,
+ [NEON_2RM_VCGT0_F] = 0x4,
+ [NEON_2RM_VCGE0_F] = 0x4,
+ [NEON_2RM_VCEQ0_F] = 0x4,
+ [NEON_2RM_VCLE0_F] = 0x4,
+ [NEON_2RM_VCLT0_F] = 0x4,
+ [NEON_2RM_VABS_F] = 0x4,
+ [NEON_2RM_VNEG_F] = 0x4,
+ [NEON_2RM_VSWP] = 0x1,
+ [NEON_2RM_VTRN] = 0x7,
+ [NEON_2RM_VUZP] = 0x7,
+ [NEON_2RM_VZIP] = 0x7,
+ [NEON_2RM_VMOVN] = 0x7,
+ [NEON_2RM_VQMOVN] = 0x7,
+ [NEON_2RM_VSHLL] = 0x7,
+ [NEON_2RM_VCVT_F16_F32] = 0x2,
+ [NEON_2RM_VCVT_F32_F16] = 0x2,
+ [NEON_2RM_VRECPE] = 0x4,
+ [NEON_2RM_VRSQRTE] = 0x4,
+ [NEON_2RM_VRECPE_F] = 0x4,
+ [NEON_2RM_VRSQRTE_F] = 0x4,
+ [NEON_2RM_VCVT_FS] = 0x4,
+ [NEON_2RM_VCVT_FU] = 0x4,
+ [NEON_2RM_VCVT_SF] = 0x4,
+ [NEON_2RM_VCVT_UF] = 0x4,
+};
+
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
We process data in a mixture of 32-bit and 64-bit chunks.
@@ -4089,14 +4424,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
int count;
int pairwise;
int u;
- int n;
- uint32_t imm;
- TCGv tmp;
- TCGv tmp2;
- TCGv tmp3;
+ uint32_t imm, mask;
+ TCGv tmp, tmp2, tmp3, tmp4, tmp5;
TCGv_i64 tmp64;
- if (!vfp_enabled(env))
+ if (!s->vfp_enabled)
return 1;
q = (insn & (1 << 6)) != 0;
u = (insn >> 24) & 1;
@@ -4107,60 +4439,65 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if ((insn & (1 << 23)) == 0) {
/* Three register same length. */
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
- || op == 10 || op == 11 || op == 16)) {
- /* 64-bit element instructions. */
+ /* Catch invalid op and bad size combinations: UNDEF */
+ if ((neon_3r_sizes[op] & (1 << size)) == 0) {
+ return 1;
+ }
+ /* All insns of this form UNDEF for either this condition or the
+ * superset of cases "Q==1"; we catch the latter later.
+ */
+ if (q && ((rd | rn | rm) & 1)) {
+ return 1;
+ }
+ if (size == 3 && op != NEON_3R_LOGIC) {
+ /* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
neon_load_reg64(cpu_V0, rn + pass);
neon_load_reg64(cpu_V1, rm + pass);
switch (op) {
- case 1: /* VQADD */
+ case NEON_3R_VQADD:
if (u) {
- gen_helper_neon_add_saturate_u64(CPU_V001);
+ gen_helper_neon_qadd_u64(cpu_V0, cpu_V0, cpu_V1);
} else {
- gen_helper_neon_add_saturate_s64(CPU_V001);
+ gen_helper_neon_qadd_s64(cpu_V0, cpu_V0, cpu_V1);
}
break;
- case 5: /* VQSUB */
+ case NEON_3R_VQSUB:
if (u) {
- gen_helper_neon_sub_saturate_u64(CPU_V001);
+ gen_helper_neon_qsub_u64(cpu_V0, cpu_V0, cpu_V1);
} else {
- gen_helper_neon_sub_saturate_s64(CPU_V001);
+ gen_helper_neon_qsub_s64(cpu_V0, cpu_V0, cpu_V1);
}
break;
- case 8: /* VSHL */
+ case NEON_3R_VSHL:
if (u) {
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
} else {
gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
}
break;
- case 9: /* VQSHL */
+ case NEON_3R_VQSHL:
if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V0);
+ gen_helper_neon_qshl_u64(cpu_V0, cpu_V1, cpu_V0);
} else {
- gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
- cpu_V1, cpu_V0);
+ gen_helper_neon_qshl_s64(cpu_V0, cpu_V1, cpu_V0);
}
break;
- case 10: /* VRSHL */
+ case NEON_3R_VRSHL:
if (u) {
gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
} else {
gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
}
break;
- case 11: /* VQRSHL */
+ case NEON_3R_VQRSHL:
if (u) {
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
+ gen_helper_neon_qrshl_u64(cpu_V0, cpu_V1, cpu_V0);
} else {
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
+ gen_helper_neon_qrshl_s64(cpu_V0, cpu_V1, cpu_V0);
}
break;
- case 16:
+ case NEON_3R_VADD_VSUB:
if (u) {
tcg_gen_sub_i64(CPU_V001);
} else {
@@ -4174,301 +4511,327 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
return 0;
}
+ pairwise = 0;
switch (op) {
- case 8: /* VSHL */
- case 9: /* VQSHL */
- case 10: /* VRSHL */
- case 11: /* VQRSHL */
+ case NEON_3R_VSHL:
+ case NEON_3R_VQSHL:
+ case NEON_3R_VRSHL:
+ case NEON_3R_VQRSHL:
{
int rtmp;
/* Shift instruction operands are reversed. */
rtmp = rn;
rn = rm;
rm = rtmp;
- pairwise = 0;
}
break;
- case 20: /* VPMAX */
- case 21: /* VPMIN */
- case 23: /* VPADD */
+ case NEON_3R_VPADD:
+ if (u) {
+ return 1;
+ }
+ /* Fall through */
+ case NEON_3R_VPMAX:
+ case NEON_3R_VPMIN:
pairwise = 1;
break;
- case 26: /* VPADD (float) */
- pairwise = (u && size < 2);
+ case NEON_3R_FLOAT_ARITH:
+ pairwise = (u && size < 2); /* if VPADD (float) */
+ break;
+ case NEON_3R_FLOAT_MINMAX:
+ pairwise = u; /* if VPMIN/VPMAX (float) */
+ break;
+ case NEON_3R_FLOAT_CMP:
+ if (!u && size) {
+ /* no encoding for U=0 C=1x */
+ return 1;
+ }
+ break;
+ case NEON_3R_FLOAT_ACMP:
+ if (!u) {
+ return 1;
+ }
+ break;
+ case NEON_3R_VRECPS_VRSQRTS:
+ if (u) {
+ return 1;
+ }
break;
- case 30: /* VPMIN/VPMAX (float) */
- pairwise = u;
+ case NEON_3R_VMUL:
+ if (u && (size != 0)) {
+ /* UNDEF on invalid size for polynomial subcase */
+ return 1;
+ }
break;
default:
- pairwise = 0;
break;
}
+
+ if (pairwise && q) {
+ /* All the pairwise insns UNDEF if Q is set */
+ return 1;
+ }
+
for (pass = 0; pass < (q ? 4 : 2); pass++) {
if (pairwise) {
/* Pairwise. */
- if (q)
- n = (pass & 1) * 2;
- else
- n = 0;
- if (pass < q + 1) {
- NEON_GET_REG(T0, rn, n);
- NEON_GET_REG(T1, rn, n + 1);
+ if (pass < 1) {
+ tmp = neon_load_reg(rn, 0);
+ tmp2 = neon_load_reg(rn, 1);
} else {
- NEON_GET_REG(T0, rm, n);
- NEON_GET_REG(T1, rm, n + 1);
+ tmp = neon_load_reg(rm, 0);
+ tmp2 = neon_load_reg(rm, 1);
}
} else {
/* Elementwise. */
- NEON_GET_REG(T0, rn, pass);
- NEON_GET_REG(T1, rm, pass);
+ tmp = neon_load_reg(rn, pass);
+ tmp2 = neon_load_reg(rm, pass);
}
switch (op) {
- case 0: /* VHADD */
+ case NEON_3R_VHADD:
GEN_NEON_INTEGER_OP(hadd);
break;
- case 1: /* VQADD */
- GEN_NEON_INTEGER_OP_ENV(qadd);
+ case NEON_3R_VQADD:
+ GEN_NEON_INTEGER_OP(qadd);
break;
- case 2: /* VRHADD */
+ case NEON_3R_VRHADD:
GEN_NEON_INTEGER_OP(rhadd);
break;
- case 3: /* Logic ops. */
+ case NEON_3R_LOGIC: /* Logic ops. */
switch ((u << 2) | size) {
case 0: /* VAND */
- gen_op_andl_T0_T1();
+ tcg_gen_and_i32(tmp, tmp, tmp2);
break;
case 1: /* BIC */
- gen_op_bicl_T0_T1();
+ tcg_gen_andc_i32(tmp, tmp, tmp2);
break;
case 2: /* VORR */
- gen_op_orl_T0_T1();
+ tcg_gen_or_i32(tmp, tmp, tmp2);
break;
case 3: /* VORN */
- gen_op_notl_T1();
- gen_op_orl_T0_T1();
+ tcg_gen_orc_i32(tmp, tmp, tmp2);
break;
case 4: /* VEOR */
- gen_op_xorl_T0_T1();
+ tcg_gen_xor_i32(tmp, tmp, tmp2);
break;
case 5: /* VBSL */
- tmp = neon_load_reg(rd, pass);
- gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
- dead_tmp(tmp);
+ tmp3 = neon_load_reg(rd, pass);
+ gen_neon_bsl(tmp, tmp, tmp2, tmp3);
+ tcg_temp_free_i32(tmp3);
break;
case 6: /* VBIT */
- tmp = neon_load_reg(rd, pass);
- gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
- dead_tmp(tmp);
+ tmp3 = neon_load_reg(rd, pass);
+ gen_neon_bsl(tmp, tmp, tmp3, tmp2);
+ tcg_temp_free_i32(tmp3);
break;
case 7: /* VBIF */
- tmp = neon_load_reg(rd, pass);
- gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
- dead_tmp(tmp);
+ tmp3 = neon_load_reg(rd, pass);
+ gen_neon_bsl(tmp, tmp3, tmp, tmp2);
+ tcg_temp_free_i32(tmp3);
break;
}
break;
- case 4: /* VHSUB */
+ case NEON_3R_VHSUB:
GEN_NEON_INTEGER_OP(hsub);
break;
- case 5: /* VQSUB */
- GEN_NEON_INTEGER_OP_ENV(qsub);
+ case NEON_3R_VQSUB:
+ GEN_NEON_INTEGER_OP(qsub);
break;
- case 6: /* VCGT */
+ case NEON_3R_VCGT:
GEN_NEON_INTEGER_OP(cgt);
break;
- case 7: /* VCGE */
+ case NEON_3R_VCGE:
GEN_NEON_INTEGER_OP(cge);
break;
- case 8: /* VSHL */
+ case NEON_3R_VSHL:
GEN_NEON_INTEGER_OP(shl);
break;
- case 9: /* VQSHL */
- GEN_NEON_INTEGER_OP_ENV(qshl);
+ case NEON_3R_VQSHL:
+ GEN_NEON_INTEGER_OP(qshl);
break;
- case 10: /* VRSHL */
+ case NEON_3R_VRSHL:
GEN_NEON_INTEGER_OP(rshl);
break;
- case 11: /* VQRSHL */
- GEN_NEON_INTEGER_OP_ENV(qrshl);
+ case NEON_3R_VQRSHL:
+ GEN_NEON_INTEGER_OP(qrshl);
break;
- case 12: /* VMAX */
+ case NEON_3R_VMAX:
GEN_NEON_INTEGER_OP(max);
break;
- case 13: /* VMIN */
+ case NEON_3R_VMIN:
GEN_NEON_INTEGER_OP(min);
break;
- case 14: /* VABD */
+ case NEON_3R_VABD:
GEN_NEON_INTEGER_OP(abd);
break;
- case 15: /* VABA */
+ case NEON_3R_VABA:
GEN_NEON_INTEGER_OP(abd);
- NEON_GET_REG(T1, rd, pass);
- gen_neon_add(size);
+ tcg_temp_free_i32(tmp2);
+ tmp2 = neon_load_reg(rd, pass);
+ gen_neon_add(size, tmp, tmp2);
break;
- case 16:
+ case NEON_3R_VADD_VSUB:
if (!u) { /* VADD */
- if (gen_neon_add(size))
- return 1;
+ gen_neon_add(size, tmp, tmp2);
} else { /* VSUB */
switch (size) {
- case 0: gen_helper_neon_sub_u8(CPU_T001); break;
- case 1: gen_helper_neon_sub_u16(CPU_T001); break;
- case 2: gen_op_subl_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
+ case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
+ default: abort();
}
}
break;
- case 17:
+ case NEON_3R_VTST_VCEQ:
if (!u) { /* VTST */
switch (size) {
- case 0: gen_helper_neon_tst_u8(CPU_T001); break;
- case 1: gen_helper_neon_tst_u16(CPU_T001); break;
- case 2: gen_helper_neon_tst_u32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
+ default: abort();
}
} else { /* VCEQ */
switch (size) {
- case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
- case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
- case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
+ default: abort();
}
}
break;
- case 18: /* Multiply. */
+ case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
switch (size) {
- case 0: gen_helper_neon_mul_u8(CPU_T001); break;
- case 1: gen_helper_neon_mul_u16(CPU_T001); break;
- case 2: gen_op_mul_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
+ case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
+ default: abort();
}
- NEON_GET_REG(T1, rd, pass);
+ tcg_temp_free_i32(tmp2);
+ tmp2 = neon_load_reg(rd, pass);
if (u) { /* VMLS */
- gen_neon_rsb(size);
+ gen_neon_rsb(size, tmp, tmp2);
} else { /* VMLA */
- gen_neon_add(size);
+ gen_neon_add(size, tmp, tmp2);
}
break;
- case 19: /* VMUL */
+ case NEON_3R_VMUL:
if (u) { /* polynomial */
- gen_helper_neon_mul_p8(CPU_T001);
+ gen_helper_neon_mul_p8(tmp, tmp, tmp2);
} else { /* Integer */
switch (size) {
- case 0: gen_helper_neon_mul_u8(CPU_T001); break;
- case 1: gen_helper_neon_mul_u16(CPU_T001); break;
- case 2: gen_op_mul_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
+ case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
+ default: abort();
}
}
break;
- case 20: /* VPMAX */
+ case NEON_3R_VPMAX:
GEN_NEON_INTEGER_OP(pmax);
break;
- case 21: /* VPMIN */
+ case NEON_3R_VPMIN:
GEN_NEON_INTEGER_OP(pmin);
break;
- case 22: /* Hultiply high. */
+ case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
if (!u) { /* VQDMULH */
switch (size) {
- case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
- case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
- default: return 1;
+ case 1: gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2); break;
+ default: abort();
}
- } else { /* VQRDHMUL */
+ } else { /* VQRDMULH */
switch (size) {
- case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
- case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
- default: return 1;
+ case 1: gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2); break;
+ default: abort();
}
}
break;
- case 23: /* VPADD */
- if (u)
- return 1;
+ case NEON_3R_VPADD:
switch (size) {
- case 0: gen_helper_neon_padd_u8(CPU_T001); break;
- case 1: gen_helper_neon_padd_u16(CPU_T001); break;
- case 2: gen_op_addl_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
+ case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
+ default: abort();
}
break;
- case 26: /* Floating point arithnetic. */
+ case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
switch ((u << 2) | size) {
case 0: /* VADD */
- gen_helper_neon_add_f32(CPU_T001);
+ gen_helper_neon_add_f32(tmp, tmp, tmp2);
break;
case 2: /* VSUB */
- gen_helper_neon_sub_f32(CPU_T001);
+ gen_helper_neon_sub_f32(tmp, tmp, tmp2);
break;
case 4: /* VPADD */
- gen_helper_neon_add_f32(CPU_T001);
+ gen_helper_neon_add_f32(tmp, tmp, tmp2);
break;
case 6: /* VABD */
- gen_helper_neon_abd_f32(CPU_T001);
+ gen_helper_neon_abd_f32(tmp, tmp, tmp2);
break;
default:
- return 1;
+ abort();
}
break;
- case 27: /* Float multiply. */
- gen_helper_neon_mul_f32(CPU_T001);
+ case NEON_3R_FLOAT_MULTIPLY:
+ gen_helper_neon_mul_f32(tmp, tmp, tmp2);
if (!u) {
- NEON_GET_REG(T1, rd, pass);
+ tcg_temp_free_i32(tmp2);
+ tmp2 = neon_load_reg(rd, pass);
if (size == 0) {
- gen_helper_neon_add_f32(CPU_T001);
+ gen_helper_neon_add_f32(tmp, tmp, tmp2);
} else {
- gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
+ gen_helper_neon_sub_f32(tmp, tmp2, tmp);
}
}
break;
- case 28: /* Float compare. */
+ case NEON_3R_FLOAT_CMP:
if (!u) {
- gen_helper_neon_ceq_f32(CPU_T001);
+ gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
} else {
if (size == 0)
- gen_helper_neon_cge_f32(CPU_T001);
+ gen_helper_neon_cge_f32(tmp, tmp, tmp2);
else
- gen_helper_neon_cgt_f32(CPU_T001);
+ gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
}
break;
- case 29: /* Float compare absolute. */
- if (!u)
- return 1;
+ case NEON_3R_FLOAT_ACMP:
if (size == 0)
- gen_helper_neon_acge_f32(CPU_T001);
+ gen_helper_neon_acge_f32(tmp, tmp, tmp2);
else
- gen_helper_neon_acgt_f32(CPU_T001);
+ gen_helper_neon_acgt_f32(tmp, tmp, tmp2);
break;
- case 30: /* Float min/max. */
+ case NEON_3R_FLOAT_MINMAX:
if (size == 0)
- gen_helper_neon_max_f32(CPU_T001);
+ gen_helper_neon_max_f32(tmp, tmp, tmp2);
else
- gen_helper_neon_min_f32(CPU_T001);
+ gen_helper_neon_min_f32(tmp, tmp, tmp2);
break;
- case 31:
+ case NEON_3R_VRECPS_VRSQRTS:
if (size == 0)
- gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
+ gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
else
- gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
+ gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
break;
default:
abort();
}
+ tcg_temp_free_i32(tmp2);
+
/* Save the result. For elementwise operations we can put it
straight into the destination register. For pairwise operations
we have to be careful to avoid clobbering the source operands. */
if (pairwise && rd == rm) {
- gen_neon_movl_scratch_T0(pass);
+ neon_store_scratch(pass, tmp);
} else {
- NEON_SET_REG(T0, rd, pass);
+ neon_store_reg(rd, pass, tmp);
}
} /* for pass */
if (pairwise && rd == rm) {
for (pass = 0; pass < (q ? 4 : 2); pass++) {
- gen_neon_movl_T0_scratch(pass);
- NEON_SET_REG(T0, rd, pass);
+ tmp = neon_load_scratch(pass);
+ neon_store_reg(rd, pass, tmp);
}
}
/* End of 3 register same size operations. */
@@ -4477,7 +4840,10 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
/* Two registers and shift. */
op = (insn >> 8) & 0xf;
if (insn & (1 << 7)) {
- /* 64-bit shift. */
+ /* 64-bit shift. */
+ if (op > 7) {
+ return 1;
+ }
size = 3;
} else {
size = 2;
@@ -4490,6 +4856,12 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (op < 8) {
/* Shift by immediate:
VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
+ if (q && ((rd | rm) & 1)) {
+ return 1;
+ }
+ if (!u && (op == 4 || op == 6)) {
+ return 1;
+ }
/* Right shifts are encoded as N - shift, where N is the
element size in bits. */
if (op <= 4)
@@ -4537,36 +4909,47 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
break;
case 4: /* VSRI */
- if (!u)
- return 1;
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
- break;
case 5: /* VSHL, VSLI */
gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
break;
- case 6: /* VQSHL */
- if (u)
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
- else
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
+ case 6: /* VQSHLU */
+ gen_helper_neon_qshlu_s64(cpu_V0, cpu_V0, cpu_V1);
break;
- case 7: /* VQSHLU */
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
+ case 7: /* VQSHL */
+ if (u) {
+ gen_helper_neon_qshl_u64(cpu_V0,
+ cpu_V0, cpu_V1);
+ } else {
+ gen_helper_neon_qshl_s64(cpu_V0,
+ cpu_V0, cpu_V1);
+ }
break;
}
if (op == 1 || op == 3) {
/* Accumulate. */
- neon_load_reg64(cpu_V0, rd + pass);
+ neon_load_reg64(cpu_V1, rd + pass);
tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
} else if (op == 4 || (op == 5 && u)) {
/* Insert */
- cpu_abort(env, "VS[LR]I.64 not implemented");
+ neon_load_reg64(cpu_V1, rd + pass);
+ uint64_t mask;
+ if (shift < -63 || shift > 63) {
+ mask = 0;
+ } else {
+ if (op == 4) {
+ mask = 0xffffffffffffffffull >> -shift;
+ } else {
+ mask = 0xffffffffffffffffull << shift;
+ }
+ }
+ tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
+ tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
}
neon_store_reg64(cpu_V0, rd + pass);
} else { /* size < 3 */
/* Operands in T0 and T1. */
- gen_op_movl_T1_im(imm);
- NEON_GET_REG(T0, rm, pass);
+ tmp = neon_load_reg(rm, pass);
+ tmp2 = tcg_const_i32(imm);
switch (op) {
case 0: /* VSHR */
case 1: /* VSRA */
@@ -4577,138 +4960,158 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
GEN_NEON_INTEGER_OP(rshl);
break;
case 4: /* VSRI */
- if (!u)
- return 1;
- GEN_NEON_INTEGER_OP(shl);
- break;
case 5: /* VSHL, VSLI */
switch (size) {
- case 0: gen_helper_neon_shl_u8(CPU_T001); break;
- case 1: gen_helper_neon_shl_u16(CPU_T001); break;
- case 2: gen_helper_neon_shl_u32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
+ default: abort();
}
break;
- case 6: /* VQSHL */
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- case 7: /* VQSHLU */
+ case 6: /* VQSHLU */
switch (size) {
- case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;
- case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;
- case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break;
- default: return 1;
+ case 0:
+ gen_helper_neon_qshlu_s8(tmp, tmp, tmp2);
+ break;
+ case 1:
+ gen_helper_neon_qshlu_s16(tmp, tmp, tmp2);
+ break;
+ case 2:
+ gen_helper_neon_qshlu_s32(tmp, tmp, tmp2);
+ break;
+ default:
+ abort();
}
break;
+ case 7: /* VQSHL */
+ GEN_NEON_INTEGER_OP(qshl);
+ break;
}
+ tcg_temp_free_i32(tmp2);
if (op == 1 || op == 3) {
/* Accumulate. */
- NEON_GET_REG(T1, rd, pass);
- gen_neon_add(size);
+ tmp2 = neon_load_reg(rd, pass);
+ gen_neon_add(size, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
} else if (op == 4 || (op == 5 && u)) {
/* Insert */
switch (size) {
case 0:
if (op == 4)
- imm = 0xff >> -shift;
+ mask = 0xff >> -shift;
else
- imm = (uint8_t)(0xff << shift);
- imm |= imm << 8;
- imm |= imm << 16;
+ mask = (uint8_t)(0xff << shift);
+ mask |= mask << 8;
+ mask |= mask << 16;
break;
case 1:
if (op == 4)
- imm = 0xffff >> -shift;
+ mask = 0xffff >> -shift;
else
- imm = (uint16_t)(0xffff << shift);
- imm |= imm << 16;
+ mask = (uint16_t)(0xffff << shift);
+ mask |= mask << 16;
break;
case 2:
- if (op == 4)
- imm = 0xffffffffu >> -shift;
- else
- imm = 0xffffffffu << shift;
+ if (shift < -31 || shift > 31) {
+ mask = 0;
+ } else {
+ if (op == 4)
+ mask = 0xffffffffu >> -shift;
+ else
+ mask = 0xffffffffu << shift;
+ }
break;
default:
abort();
}
- tmp = neon_load_reg(rd, pass);
- tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
- tcg_gen_andi_i32(tmp, tmp, ~imm);
- tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
+ tmp2 = neon_load_reg(rd, pass);
+ tcg_gen_andi_i32(tmp, tmp, mask);
+ tcg_gen_andi_i32(tmp2, tmp2, ~mask);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
}
- NEON_SET_REG(T0, rd, pass);
+ neon_store_reg(rd, pass, tmp);
}
} /* for pass */
} else if (op < 10) {
/* Shift by immediate and narrow:
VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
+ int input_unsigned = (op == 8) ? !u : u;
+ if (rm & 1) {
+ return 1;
+ }
shift = shift - (1 << (size + 3));
size++;
- switch (size) {
- case 1:
- imm = (uint16_t)shift;
- imm |= imm << 16;
- tmp2 = tcg_const_i32(imm);
- TCGV_UNUSED_I64(tmp64);
- break;
- case 2:
- imm = (uint32_t)shift;
- tmp2 = tcg_const_i32(imm);
- TCGV_UNUSED_I64(tmp64);
- break;
- case 3:
+ if (size == 3) {
tmp64 = tcg_const_i64(shift);
- TCGV_UNUSED(tmp2);
- break;
- default:
- abort();
- }
-
- for (pass = 0; pass < 2; pass++) {
- if (size == 3) {
- neon_load_reg64(cpu_V0, rm + pass);
+ neon_load_reg64(cpu_V0, rm);
+ neon_load_reg64(cpu_V1, rm + 1);
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 in;
+ if (pass == 0) {
+ in = cpu_V0;
+ } else {
+ in = cpu_V1;
+ }
if (q) {
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
+ if (input_unsigned) {
+ gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
+ } else {
+ gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
+ }
} else {
- if (u)
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
- else
- gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
+ if (input_unsigned) {
+ gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
+ } else {
+ gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
+ }
}
+ tmp = tcg_temp_new_i32();
+ gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
+ neon_store_reg(rd, pass, tmp);
+ } /* for pass */
+ tcg_temp_free_i64(tmp64);
+ } else {
+ if (size == 1) {
+ imm = (uint16_t)shift;
+ imm |= imm << 16;
} else {
- tmp = neon_load_reg(rm + pass, 0);
- gen_neon_shift_narrow(size, tmp, tmp2, q, u);
- tmp3 = neon_load_reg(rm + pass, 1);
- gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
- tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
- dead_tmp(tmp);
- dead_tmp(tmp3);
- }
- tmp = new_tmp();
- if (op == 8 && !u) {
- gen_neon_narrow(size - 1, tmp, cpu_V0);
- } else {
- if (op == 8)
- gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
- else
- gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
- }
- if (pass == 0) {
- tmp2 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp);
+ /* size == 2 */
+ imm = (uint32_t)shift;
}
- } /* for pass */
+ tmp2 = tcg_const_i32(imm);
+ tmp4 = neon_load_reg(rm + 1, 0);
+ tmp5 = neon_load_reg(rm + 1, 1);
+ for (pass = 0; pass < 2; pass++) {
+ if (pass == 0) {
+ tmp = neon_load_reg(rm, 0);
+ } else {
+ tmp = tmp4;
+ }
+ gen_neon_shift_narrow(size, tmp, tmp2, q,
+ input_unsigned);
+ if (pass == 0) {
+ tmp3 = neon_load_reg(rm, 1);
+ } else {
+ tmp3 = tmp5;
+ }
+ gen_neon_shift_narrow(size, tmp3, tmp2, q,
+ input_unsigned);
+ tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp3);
+ tmp = tcg_temp_new_i32();
+ gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
+ neon_store_reg(rd, pass, tmp);
+ } /* for pass */
+ tcg_temp_free_i32(tmp2);
+ }
} else if (op == 10) {
- /* VSHLL */
- if (q || size == 3)
+ /* VSHLL, VMOVL */
+ if (q || (rd & 1)) {
return 1;
+ }
tmp = neon_load_reg(rm, 0);
tmp2 = neon_load_reg(rm, 1);
for (pass = 0; pass < 2; pass++) {
@@ -4721,34 +5124,53 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
/* The shift is less than the width of the source
type, so we can just shift the whole register. */
tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
+ /* Widen the result of shift: we need to clear
+ * the potential overflow bits resulting from
+ * left bits of the narrow input appearing as
+ * right bits of left the neighbour narrow
+ * input. */
if (size < 2 || !u) {
uint64_t imm64;
if (size == 0) {
imm = (0xffu >> (8 - shift));
imm |= imm << 16;
- } else {
+ } else if (size == 1) {
imm = 0xffff >> (16 - shift);
+ } else {
+ /* size == 2 */
+ imm = 0xffffffff >> (32 - shift);
+ }
+ if (size < 2) {
+ imm64 = imm | (((uint64_t)imm) << 32);
+ } else {
+ imm64 = imm;
}
- imm64 = imm | (((uint64_t)imm) << 32);
- tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
+ tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
}
}
neon_store_reg64(cpu_V0, rd + pass);
}
- } else if (op == 15 || op == 16) {
+ } else if (op >= 14) {
/* VCVT fixed-point. */
+ if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
+ return 1;
+ }
+ /* We have already masked out the must-be-1 top bit of imm6,
+ * hence this 32-shift where the ARM ARM has 64-imm6.
+ */
+ shift = 32 - shift;
for (pass = 0; pass < (q ? 4 : 2); pass++) {
tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
- if (op & 1) {
+ if (!(op & 1)) {
if (u)
- gen_vfp_ulto(0, shift);
+ gen_vfp_ulto(0, shift, 1);
else
- gen_vfp_slto(0, shift);
+ gen_vfp_slto(0, shift, 1);
} else {
if (u)
- gen_vfp_toul(0, shift);
+ gen_vfp_toul(0, shift, 1);
else
- gen_vfp_tosl(0, shift);
+ gen_vfp_tosl(0, shift, 1);
}
tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
}
@@ -4757,11 +5179,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
} else { /* (insn & 0x00380080) == 0 */
int invert;
+ if (q && (rd & 1)) {
+ return 1;
+ }
op = (insn >> 8) & 0xf;
/* One register and immediate. */
imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
invert = (insn & (1 << 5)) != 0;
+ /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
+ * We choose to not special-case this and will behave as if a
+ * valid constant encoding of 0 had been given.
+ */
switch (op) {
case 0: case 1:
/* no-op */
@@ -4782,7 +5211,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
imm = (imm << 8) | (imm << 24);
break;
case 12:
- imm = (imm < 8) | 0xff;
+ imm = (imm << 8) | 0xff;
break;
case 13:
imm = (imm << 16) | 0xffff;
@@ -4793,6 +5222,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
imm = ~imm;
break;
case 15:
+ if (invert) {
+ return 1;
+ }
imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
break;
@@ -4800,9 +5232,6 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (invert)
imm = ~imm;
- if (op != 14 || !invert)
- gen_op_movl_T1_im(imm);
-
for (pass = 0; pass < (q ? 4 : 2); pass++) {
if (op & 1 && op < 12) {
tmp = neon_load_reg(rd, pass);
@@ -4815,8 +5244,9 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
} else {
/* VMOV, VMVN. */
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
if (op == 14 && invert) {
+ int n;
uint32_t val;
val = 0;
for (n = 0; n < 4; n++) {
@@ -4839,41 +5269,57 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
int src1_wide;
int src2_wide;
int prewiden;
- /* prewiden, src1_wide, src2_wide */
- static const int neon_3reg_wide[16][3] = {
- {1, 0, 0}, /* VADDL */
- {1, 1, 0}, /* VADDW */
- {1, 0, 0}, /* VSUBL */
- {1, 1, 0}, /* VSUBW */
- {0, 1, 1}, /* VADDHN */
- {0, 0, 0}, /* VABAL */
- {0, 1, 1}, /* VSUBHN */
- {0, 0, 0}, /* VABDL */
- {0, 0, 0}, /* VMLAL */
- {0, 0, 0}, /* VQDMLAL */
- {0, 0, 0}, /* VMLSL */
- {0, 0, 0}, /* VQDMLSL */
- {0, 0, 0}, /* Integer VMULL */
- {0, 0, 0}, /* VQDMULL */
- {0, 0, 0} /* Polynomial VMULL */
+ /* undefreq: bit 0 : UNDEF if size != 0
+ * bit 1 : UNDEF if size == 0
+ * bit 2 : UNDEF if U == 1
+ * Note that [1:0] set implies 'always UNDEF'
+ */
+ int undefreq;
+ /* prewiden, src1_wide, src2_wide, undefreq */
+ static const int neon_3reg_wide[16][4] = {
+ {1, 0, 0, 0}, /* VADDL */
+ {1, 1, 0, 0}, /* VADDW */
+ {1, 0, 0, 0}, /* VSUBL */
+ {1, 1, 0, 0}, /* VSUBW */
+ {0, 1, 1, 0}, /* VADDHN */
+ {0, 0, 0, 0}, /* VABAL */
+ {0, 1, 1, 0}, /* VSUBHN */
+ {0, 0, 0, 0}, /* VABDL */
+ {0, 0, 0, 0}, /* VMLAL */
+ {0, 0, 0, 6}, /* VQDMLAL */
+ {0, 0, 0, 0}, /* VMLSL */
+ {0, 0, 0, 6}, /* VQDMLSL */
+ {0, 0, 0, 0}, /* Integer VMULL */
+ {0, 0, 0, 2}, /* VQDMULL */
+ {0, 0, 0, 5}, /* Polynomial VMULL */
+ {0, 0, 0, 3}, /* Reserved: always UNDEF */
};
prewiden = neon_3reg_wide[op][0];
src1_wide = neon_3reg_wide[op][1];
src2_wide = neon_3reg_wide[op][2];
+ undefreq = neon_3reg_wide[op][3];
- if (size == 0 && (op == 9 || op == 11 || op == 13))
+ if (((undefreq & 1) && (size != 0)) ||
+ ((undefreq & 2) && (size == 0)) ||
+ ((undefreq & 4) && u)) {
+ return 1;
+ }
+ if ((src1_wide && (rn & 1)) ||
+ (src2_wide && (rm & 1)) ||
+ (!src2_wide && (rd & 1))) {
return 1;
+ }
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
if (rd == rm && !src2_wide) {
- NEON_GET_REG(T0, rm, 1);
- gen_neon_movl_scratch_T0(2);
+ tmp = neon_load_reg(rm, 1);
+ neon_store_scratch(2, tmp);
} else if (rd == rn && !src1_wide) {
- NEON_GET_REG(T0, rn, 1);
- gen_neon_movl_scratch_T0(2);
+ tmp = neon_load_reg(rn, 1);
+ neon_store_scratch(2, tmp);
}
TCGV_UNUSED(tmp3);
for (pass = 0; pass < 2; pass++) {
@@ -4882,9 +5328,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
TCGV_UNUSED(tmp);
} else {
if (pass == 1 && rd == rn) {
- gen_neon_movl_T0_scratch(2);
- tmp = new_tmp();
- tcg_gen_mov_i32(tmp, cpu_T[0]);
+ tmp = neon_load_scratch(2);
} else {
tmp = neon_load_reg(rn, pass);
}
@@ -4897,9 +5341,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
TCGV_UNUSED(tmp2);
} else {
if (pass == 1 && rd == rm) {
- gen_neon_movl_T0_scratch(2);
- tmp2 = new_tmp();
- tcg_gen_mov_i32(tmp2, cpu_T[0]);
+ tmp2 = neon_load_scratch(2);
} else {
tmp2 = neon_load_reg(rm, pass);
}
@@ -4911,7 +5353,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
gen_neon_addl(size);
break;
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
+ case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
gen_neon_subl(size);
break;
case 5: case 7: /* VABAL, VABDL */
@@ -4936,49 +5378,50 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
break;
default: abort();
}
- dead_tmp(tmp2);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
break;
case 8: case 9: case 10: case 11: case 12: case 13:
/* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
break;
case 14: /* Polynomial VMULL */
- cpu_abort(env, "Polynomial VMULL not implemented");
-
- default: /* 15 is RESERVED. */
- return 1;
+ gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
+ break;
+ default: /* 15 is RESERVED: caught earlier */
+ abort();
}
- if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
+ if (op == 13) {
+ /* VQDMULL */
+ gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
+ neon_store_reg64(cpu_V0, rd + pass);
+ } else if (op == 5 || (op >= 8 && op <= 11)) {
/* Accumulate. */
- if (op == 10 || op == 11) {
- gen_neon_negl(cpu_V0, size);
- }
-
- if (op != 13) {
- neon_load_reg64(cpu_V1, rd + pass);
- }
-
+ neon_load_reg64(cpu_V1, rd + pass);
switch (op) {
- case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
+ case 10: /* VMLSL */
+ gen_neon_negl(cpu_V0, size);
+ /* Fall through */
+ case 5: case 8: /* VABAL, VMLAL */
gen_neon_addl(size);
break;
case 9: case 11: /* VQDMLAL, VQDMLSL */
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
+ if (op == 11) {
+ gen_neon_negl(cpu_V0, size);
+ }
gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
break;
- /* Fall through. */
- case 13: /* VQDMULL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- break;
default:
abort();
}
neon_store_reg64(cpu_V0, rd + pass);
} else if (op == 4 || op == 6) {
/* Narrowing operation. */
- tmp = new_tmp();
- if (u) {
+ tmp = tcg_temp_new_i32();
+ if (!u) {
switch (size) {
case 0:
gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
@@ -5020,101 +5463,124 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
}
} else {
- /* Two registers and a scalar. */
+ /* Two registers and a scalar. NB that for ops of this form
+ * the ARM ARM labels bit 24 as Q, but it is in our variable
+ * 'u', not 'q'.
+ */
+ if (size == 0) {
+ return 1;
+ }
switch (op) {
- case 0: /* Integer VMLA scalar */
case 1: /* Float VMLA scalar */
- case 4: /* Integer VMLS scalar */
case 5: /* Floating point VMLS scalar */
- case 8: /* Integer VMUL scalar */
case 9: /* Floating point VMUL scalar */
+ if (size == 1) {
+ return 1;
+ }
+ /* fall through */
+ case 0: /* Integer VMLA scalar */
+ case 4: /* Integer VMLS scalar */
+ case 8: /* Integer VMUL scalar */
case 12: /* VQDMULH scalar */
case 13: /* VQRDMULH scalar */
- gen_neon_get_scalar(size, rm);
- gen_neon_movl_scratch_T0(0);
+ if (u && ((rd | rn) & 1)) {
+ return 1;
+ }
+ tmp = neon_get_scalar(size, rm);
+ neon_store_scratch(0, tmp);
for (pass = 0; pass < (u ? 4 : 2); pass++) {
- if (pass != 0)
- gen_neon_movl_T0_scratch(0);
- NEON_GET_REG(T1, rn, pass);
+ tmp = neon_load_scratch(0);
+ tmp2 = neon_load_reg(rn, pass);
if (op == 12) {
if (size == 1) {
- gen_helper_neon_qdmulh_s16(CPU_T0E01);
+ gen_helper_neon_qdmulh_s16(tmp, tmp, tmp2);
} else {
- gen_helper_neon_qdmulh_s32(CPU_T0E01);
+ gen_helper_neon_qdmulh_s32(tmp, tmp, tmp2);
}
} else if (op == 13) {
if (size == 1) {
- gen_helper_neon_qrdmulh_s16(CPU_T0E01);
+ gen_helper_neon_qrdmulh_s16(tmp, tmp, tmp2);
} else {
- gen_helper_neon_qrdmulh_s32(CPU_T0E01);
+ gen_helper_neon_qrdmulh_s32(tmp, tmp, tmp2);
}
} else if (op & 1) {
- gen_helper_neon_mul_f32(CPU_T001);
+ gen_helper_neon_mul_f32(tmp, tmp, tmp2);
} else {
switch (size) {
- case 0: gen_helper_neon_mul_u8(CPU_T001); break;
- case 1: gen_helper_neon_mul_u16(CPU_T001); break;
- case 2: gen_op_mul_T0_T1(); break;
- default: return 1;
+ case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
+ case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
+ default: abort();
}
}
+ tcg_temp_free_i32(tmp2);
if (op < 8) {
/* Accumulate. */
- NEON_GET_REG(T1, rd, pass);
+ tmp2 = neon_load_reg(rd, pass);
switch (op) {
case 0:
- gen_neon_add(size);
+ gen_neon_add(size, tmp, tmp2);
break;
case 1:
- gen_helper_neon_add_f32(CPU_T001);
+ gen_helper_neon_add_f32(tmp, tmp, tmp2);
break;
case 4:
- gen_neon_rsb(size);
+ gen_neon_rsb(size, tmp, tmp2);
break;
case 5:
- gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
+ gen_helper_neon_sub_f32(tmp, tmp2, tmp);
break;
default:
abort();
}
+ tcg_temp_free_i32(tmp2);
}
- NEON_SET_REG(T0, rd, pass);
+ neon_store_reg(rd, pass, tmp);
}
break;
- case 2: /* VMLAL sclar */
case 3: /* VQDMLAL scalar */
- case 6: /* VMLSL scalar */
case 7: /* VQDMLSL scalar */
- case 10: /* VMULL scalar */
case 11: /* VQDMULL scalar */
- if (size == 0 && (op == 3 || op == 7 || op == 11))
+ if (u == 1) {
return 1;
-
- gen_neon_get_scalar(size, rm);
- NEON_GET_REG(T1, rn, 1);
+ }
+ /* fall through */
+ case 2: /* VMLAL sclar */
+ case 6: /* VMLSL scalar */
+ case 10: /* VMULL scalar */
+ if (rd & 1) {
+ return 1;
+ }
+ tmp2 = neon_get_scalar(size, rm);
+ /* We need a copy of tmp2 because gen_neon_mull
+ * deletes it during pass 0. */
+ tmp4 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp4, tmp2);
+ tmp3 = neon_load_reg(rn, 1);
for (pass = 0; pass < 2; pass++) {
if (pass == 0) {
tmp = neon_load_reg(rn, 0);
} else {
- tmp = new_tmp();
- tcg_gen_mov_i32(tmp, cpu_T[1]);
+ tmp = tmp3;
+ tmp2 = tmp4;
}
- tmp2 = new_tmp();
- tcg_gen_mov_i32(tmp2, cpu_T[0]);
gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- if (op == 6 || op == 7) {
- gen_neon_negl(cpu_V0, size);
- }
if (op != 11) {
neon_load_reg64(cpu_V1, rd + pass);
}
switch (op) {
- case 2: case 6:
+ case 6:
+ gen_neon_negl(cpu_V0, size);
+ /* Fall through */
+ case 2:
gen_neon_addl(size);
break;
case 3: case 7:
gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
+ if (op == 7) {
+ gen_neon_negl(cpu_V0, size);
+ }
gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
break;
case 10:
@@ -5128,6 +5594,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
neon_store_reg64(cpu_V0, rd + pass);
}
+
+
break;
default: /* 14 and 15 are RESERVED */
return 1;
@@ -5137,11 +5605,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
if (!u) {
/* Extract. */
imm = (insn >> 8) & 0xf;
- count = q + 1;
if (imm > 7 && !q)
return 1;
+ if (q && ((rd | rn | rm) & 1)) {
+ return 1;
+ }
+
if (imm == 0) {
neon_load_reg64(cpu_V0, rn);
if (q) {
@@ -5173,6 +5644,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
+ tcg_temp_free_i64(tmp64);
} else {
/* BUGFIX */
neon_load_reg64(cpu_V0, rn);
@@ -5189,37 +5661,40 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
/* Two register misc. */
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
size = (insn >> 18) & 3;
+ /* UNDEF for unknown op values and bad op-size combinations */
+ if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
+ return 1;
+ }
+ if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
+ q && ((rm | rd) & 1)) {
+ return 1;
+ }
switch (op) {
- case 0: /* VREV64 */
- if (size == 3)
- return 1;
+ case NEON_2RM_VREV64:
for (pass = 0; pass < (q ? 2 : 1); pass++) {
- NEON_GET_REG(T0, rm, pass * 2);
- NEON_GET_REG(T1, rm, pass * 2 + 1);
+ tmp = neon_load_reg(rm, pass * 2);
+ tmp2 = neon_load_reg(rm, pass * 2 + 1);
switch (size) {
- case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
- case 1: gen_swap_half(cpu_T[0]); break;
+ case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
+ case 1: gen_swap_half(tmp); break;
case 2: /* no-op */ break;
default: abort();
}
- NEON_SET_REG(T0, rd, pass * 2 + 1);
+ neon_store_reg(rd, pass * 2 + 1, tmp);
if (size == 2) {
- NEON_SET_REG(T1, rd, pass * 2);
+ neon_store_reg(rd, pass * 2, tmp2);
} else {
- gen_op_movl_T0_T1();
switch (size) {
- case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
- case 1: gen_swap_half(cpu_T[0]); break;
+ case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
+ case 1: gen_swap_half(tmp2); break;
default: abort();
}
- NEON_SET_REG(T0, rd, pass * 2);
+ neon_store_reg(rd, pass * 2, tmp2);
}
}
break;
- case 4: case 5: /* VPADDL */
- case 12: case 13: /* VPADAL */
- if (size == 3)
- return 1;
+ case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
+ case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
for (pass = 0; pass < q + 1; pass++) {
tmp = neon_load_reg(rm, pass * 2);
gen_neon_widen(cpu_V0, tmp, size, op & 1);
@@ -5231,7 +5706,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 2: tcg_gen_add_i64(CPU_V001); break;
default: abort();
}
- if (op >= 12) {
+ if (op >= NEON_2RM_VPADAL) {
/* Accumulate. */
neon_load_reg64(cpu_V1, rd + pass);
gen_neon_addl(size);
@@ -5239,85 +5714,40 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
neon_store_reg64(cpu_V0, rd + pass);
}
break;
- case 33: /* VTRN */
+ case NEON_2RM_VTRN:
if (size == 2) {
+ int n;
for (n = 0; n < (q ? 4 : 2); n += 2) {
- NEON_GET_REG(T0, rm, n);
- NEON_GET_REG(T1, rd, n + 1);
- NEON_SET_REG(T1, rm, n);
- NEON_SET_REG(T0, rd, n + 1);
+ tmp = neon_load_reg(rm, n);
+ tmp2 = neon_load_reg(rd, n + 1);
+ neon_store_reg(rm, n, tmp2);
+ neon_store_reg(rd, n + 1, tmp);
}
} else {
goto elementwise;
}
break;
- case 34: /* VUZP */
- /* Reg Before After
- Rd A3 A2 A1 A0 B2 B0 A2 A0
- Rm B3 B2 B1 B0 B3 B1 A3 A1
- */
- if (size == 3)
+ case NEON_2RM_VUZP:
+ if (gen_neon_unzip(rd, rm, size, q)) {
return 1;
- gen_neon_unzip(rd, q, 0, size);
- gen_neon_unzip(rm, q, 4, size);
- if (q) {
- static int unzip_order_q[8] =
- {0, 2, 4, 6, 1, 3, 5, 7};
- for (n = 0; n < 8; n++) {
- int reg = (n < 4) ? rd : rm;
- gen_neon_movl_T0_scratch(unzip_order_q[n]);
- NEON_SET_REG(T0, reg, n % 4);
- }
- } else {
- static int unzip_order[4] =
- {0, 4, 1, 5};
- for (n = 0; n < 4; n++) {
- int reg = (n < 2) ? rd : rm;
- gen_neon_movl_T0_scratch(unzip_order[n]);
- NEON_SET_REG(T0, reg, n % 2);
- }
}
break;
- case 35: /* VZIP */
- /* Reg Before After
- Rd A3 A2 A1 A0 B1 A1 B0 A0
- Rm B3 B2 B1 B0 B3 A3 B2 A2
- */
- if (size == 3)
+ case NEON_2RM_VZIP:
+ if (gen_neon_zip(rd, rm, size, q)) {
return 1;
- count = (q ? 4 : 2);
- for (n = 0; n < count; n++) {
- NEON_GET_REG(T0, rd, n);
- NEON_GET_REG(T1, rd, n);
- switch (size) {
- case 0: gen_helper_neon_zip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break;
- case 2: /* no-op */; break;
- default: abort();
- }
- gen_neon_movl_scratch_T0(n * 2);
- gen_neon_movl_scratch_T1(n * 2 + 1);
- }
- for (n = 0; n < count * 2; n++) {
- int reg = (n < count) ? rd : rm;
- gen_neon_movl_T0_scratch(n);
- NEON_SET_REG(T0, reg, n % count);
}
break;
- case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
- if (size == 3)
+ case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
+ /* also VQMOVUN; op field and mnemonics don't line up */
+ if (rm & 1) {
return 1;
+ }
TCGV_UNUSED(tmp2);
for (pass = 0; pass < 2; pass++) {
neon_load_reg64(cpu_V0, rm + pass);
- tmp = new_tmp();
- if (op == 36 && q == 0) {
- gen_neon_narrow(size, tmp, cpu_V0);
- } else if (q) {
- gen_neon_narrow_satu(size, tmp, cpu_V0);
- } else {
- gen_neon_narrow_sats(size, tmp, cpu_V0);
- }
+ tmp = tcg_temp_new_i32();
+ gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
+ tmp, cpu_V0);
if (pass == 0) {
tmp2 = tmp;
} else {
@@ -5326,243 +5756,317 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
}
}
break;
- case 38: /* VSHLL */
- if (q || size == 3)
+ case NEON_2RM_VSHLL:
+ if (q || (rd & 1)) {
return 1;
+ }
tmp = neon_load_reg(rm, 0);
tmp2 = neon_load_reg(rm, 1);
for (pass = 0; pass < 2; pass++) {
if (pass == 1)
tmp = tmp2;
gen_neon_widen(cpu_V0, tmp, size, 1);
+ tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
neon_store_reg64(cpu_V0, rd + pass);
}
break;
+ case NEON_2RM_VCVT_F16_F32:
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
+ q || (rm & 1)) {
+ return 1;
+ }
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
+ gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
+ gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp2, tmp2, tmp);
+ tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
+ gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
+ tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
+ neon_store_reg(rd, 0, tmp2);
+ tmp2 = tcg_temp_new_i32();
+ gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp2, tmp2, tmp);
+ neon_store_reg(rd, 1, tmp2);
+ tcg_temp_free_i32(tmp);
+ break;
+ case NEON_2RM_VCVT_F32_F16:
+ if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
+ q || (rd & 1)) {
+ return 1;
+ }
+ tmp3 = tcg_temp_new_i32();
+ tmp = neon_load_reg(rm, 0);
+ tmp2 = neon_load_reg(rm, 1);
+ tcg_gen_ext16u_i32(tmp3, tmp);
+ gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+ tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
+ tcg_gen_shri_i32(tmp3, tmp, 16);
+ gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+ tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
+ tcg_temp_free_i32(tmp);
+ tcg_gen_ext16u_i32(tmp3, tmp2);
+ gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+ tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
+ tcg_gen_shri_i32(tmp3, tmp2, 16);
+ gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
+ tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ break;
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
- if (op == 30 || op == 31 || op >= 58) {
+ if (neon_2rm_is_float_op(op)) {
tcg_gen_ld_f32(cpu_F0s, cpu_env,
neon_reg_offset(rm, pass));
+ TCGV_UNUSED(tmp);
} else {
- NEON_GET_REG(T0, rm, pass);
+ tmp = neon_load_reg(rm, pass);
}
switch (op) {
- case 1: /* VREV32 */
+ case NEON_2RM_VREV32:
switch (size) {
- case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
- case 1: gen_swap_half(cpu_T[0]); break;
- default: return 1;
+ case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
+ case 1: gen_swap_half(tmp); break;
+ default: abort();
}
break;
- case 2: /* VREV16 */
- if (size != 0)
- return 1;
- gen_rev16(cpu_T[0]);
+ case NEON_2RM_VREV16:
+ gen_rev16(tmp);
break;
- case 8: /* CLS */
+ case NEON_2RM_VCLS:
switch (size) {
- case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;
- case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;
- case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break;
- default: return 1;
+ case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
+ case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
+ case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
+ default: abort();
}
break;
- case 9: /* CLZ */
+ case NEON_2RM_VCLZ:
switch (size) {
- case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;
- case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;
- case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break;
- default: return 1;
+ case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
+ case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
+ case 2: gen_helper_clz(tmp, tmp); break;
+ default: abort();
}
break;
- case 10: /* CNT */
- if (size != 0)
- return 1;
- gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]);
+ case NEON_2RM_VCNT:
+ gen_helper_neon_cnt_u8(tmp, tmp);
break;
- case 11: /* VNOT */
- if (size != 0)
- return 1;
- gen_op_notl_T0();
+ case NEON_2RM_VMVN:
+ tcg_gen_not_i32(tmp, tmp);
break;
- case 14: /* VQABS */
+ case NEON_2RM_VQABS:
switch (size) {
- case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
- case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
- case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
- default: return 1;
+ case 0: gen_helper_neon_qabs_s8(tmp, tmp); break;
+ case 1: gen_helper_neon_qabs_s16(tmp, tmp); break;
+ case 2: gen_helper_neon_qabs_s32(tmp, tmp); break;
+ default: abort();
}
break;
- case 15: /* VQNEG */
+ case NEON_2RM_VQNEG:
switch (size) {
- case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
- case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
- case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
- default: return 1;
+ case 0: gen_helper_neon_qneg_s8(tmp, tmp); break;
+ case 1: gen_helper_neon_qneg_s16(tmp, tmp); break;
+ case 2: gen_helper_neon_qneg_s32(tmp, tmp); break;
+ default: abort();
}
break;
- case 16: case 19: /* VCGT #0, VCLE #0 */
- gen_op_movl_T1_im(0);
+ case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
+ tmp2 = tcg_const_i32(0);
switch(size) {
- case 0: gen_helper_neon_cgt_s8(CPU_T001); break;
- case 1: gen_helper_neon_cgt_s16(CPU_T001); break;
- case 2: gen_helper_neon_cgt_s32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
+ default: abort();
+ }
+ tcg_temp_free(tmp2);
+ if (op == NEON_2RM_VCLE0) {
+ tcg_gen_not_i32(tmp, tmp);
}
- if (op == 19)
- gen_op_notl_T0();
break;
- case 17: case 20: /* VCGE #0, VCLT #0 */
- gen_op_movl_T1_im(0);
+ case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
+ tmp2 = tcg_const_i32(0);
switch(size) {
- case 0: gen_helper_neon_cge_s8(CPU_T001); break;
- case 1: gen_helper_neon_cge_s16(CPU_T001); break;
- case 2: gen_helper_neon_cge_s32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
+ default: abort();
+ }
+ tcg_temp_free(tmp2);
+ if (op == NEON_2RM_VCLT0) {
+ tcg_gen_not_i32(tmp, tmp);
}
- if (op == 20)
- gen_op_notl_T0();
break;
- case 18: /* VCEQ #0 */
- gen_op_movl_T1_im(0);
+ case NEON_2RM_VCEQ0:
+ tmp2 = tcg_const_i32(0);
switch(size) {
- case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
- case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
- case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
- default: return 1;
+ case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
+ case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
+ case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
+ default: abort();
}
+ tcg_temp_free(tmp2);
break;
- case 22: /* VABS */
+ case NEON_2RM_VABS:
switch(size) {
- case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;
- case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;
- case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break;
- default: return 1;
+ case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
+ case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
+ case 2: tcg_gen_abs_i32(tmp, tmp); break;
+ default: abort();
}
break;
- case 23: /* VNEG */
- gen_op_movl_T1_im(0);
- if (size == 3)
- return 1;
- gen_neon_rsb(size);
+ case NEON_2RM_VNEG:
+ tmp2 = tcg_const_i32(0);
+ gen_neon_rsb(size, tmp, tmp2);
+ tcg_temp_free(tmp2);
+ break;
+ case NEON_2RM_VCGT0_F:
+ tmp2 = tcg_const_i32(0);
+ gen_helper_neon_cgt_f32(tmp, tmp, tmp2);
+ tcg_temp_free(tmp2);
+ break;
+ case NEON_2RM_VCGE0_F:
+ tmp2 = tcg_const_i32(0);
+ gen_helper_neon_cge_f32(tmp, tmp, tmp2);
+ tcg_temp_free(tmp2);
break;
- case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
- gen_op_movl_T1_im(0);
- gen_helper_neon_cgt_f32(CPU_T001);
- if (op == 27)
- gen_op_notl_T0();
+ case NEON_2RM_VCEQ0_F:
+ tmp2 = tcg_const_i32(0);
+ gen_helper_neon_ceq_f32(tmp, tmp, tmp2);
+ tcg_temp_free(tmp2);
break;
- case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
- gen_op_movl_T1_im(0);
- gen_helper_neon_cge_f32(CPU_T001);
- if (op == 28)
- gen_op_notl_T0();
+ case NEON_2RM_VCLE0_F:
+ tmp2 = tcg_const_i32(0);
+ gen_helper_neon_cge_f32(tmp, tmp2, tmp);
+ tcg_temp_free(tmp2);
break;
- case 26: /* Float VCEQ #0 */
- gen_op_movl_T1_im(0);
- gen_helper_neon_ceq_f32(CPU_T001);
+ case NEON_2RM_VCLT0_F:
+ tmp2 = tcg_const_i32(0);
+ gen_helper_neon_cgt_f32(tmp, tmp2, tmp);
+ tcg_temp_free(tmp2);
break;
- case 30: /* Float VABS */
+ case NEON_2RM_VABS_F:
gen_vfp_abs(0);
break;
- case 31: /* Float VNEG */
+ case NEON_2RM_VNEG_F:
gen_vfp_neg(0);
break;
- case 32: /* VSWP */
- NEON_GET_REG(T1, rd, pass);
- NEON_SET_REG(T1, rm, pass);
+ case NEON_2RM_VSWP:
+ tmp2 = neon_load_reg(rd, pass);
+ neon_store_reg(rm, pass, tmp2);
break;
- case 33: /* VTRN */
- NEON_GET_REG(T1, rd, pass);
+ case NEON_2RM_VTRN:
+ tmp2 = neon_load_reg(rd, pass);
switch (size) {
- case 0: gen_helper_neon_trn_u8(); break;
- case 1: gen_helper_neon_trn_u16(); break;
- case 2: abort();
- default: return 1;
+ case 0: gen_neon_trn_u8(tmp, tmp2); break;
+ case 1: gen_neon_trn_u16(tmp, tmp2); break;
+ default: abort();
}
- NEON_SET_REG(T1, rm, pass);
+ neon_store_reg(rm, pass, tmp2);
break;
- case 56: /* Integer VRECPE */
- gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env);
+ case NEON_2RM_VRECPE:
+ gen_helper_recpe_u32(tmp, tmp, cpu_env);
break;
- case 57: /* Integer VRSQRTE */
- gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env);
+ case NEON_2RM_VRSQRTE:
+ gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
break;
- case 58: /* Float VRECPE */
+ case NEON_2RM_VRECPE_F:
gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
break;
- case 59: /* Float VRSQRTE */
+ case NEON_2RM_VRSQRTE_F:
gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
break;
- case 60: /* VCVT.F32.S32 */
- gen_vfp_tosiz(0);
+ case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
+ gen_vfp_sito(0, 1);
break;
- case 61: /* VCVT.F32.U32 */
- gen_vfp_touiz(0);
+ case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
+ gen_vfp_uito(0, 1);
break;
- case 62: /* VCVT.S32.F32 */
- gen_vfp_sito(0);
+ case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
+ gen_vfp_tosiz(0, 1);
break;
- case 63: /* VCVT.U32.F32 */
- gen_vfp_uito(0);
+ case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
+ gen_vfp_touiz(0, 1);
break;
default:
- /* Reserved: 21, 29, 39-56 */
- return 1;
+ /* Reserved op values were caught by the
+ * neon_2rm_sizes[] check earlier.
+ */
+ abort();
}
- if (op == 30 || op == 31 || op >= 58) {
+ if (neon_2rm_is_float_op(op)) {
tcg_gen_st_f32(cpu_F0s, cpu_env,
neon_reg_offset(rd, pass));
} else {
- NEON_SET_REG(T0, rd, pass);
+ neon_store_reg(rd, pass, tmp);
}
}
break;
}
} else if ((insn & (1 << 10)) == 0) {
/* VTBL, VTBX. */
- n = ((insn >> 5) & 0x18) + 8;
+ int n = ((insn >> 8) & 3) + 1;
+ if ((rn + n) > 32) {
+ /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
+ * helper function running off the end of the register file.
+ */
+ return 1;
+ }
+ n <<= 3;
if (insn & (1 << 6)) {
tmp = neon_load_reg(rd, 0);
} else {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
tmp2 = neon_load_reg(rm, 0);
- gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
- tcg_const_i32(n));
- dead_tmp(tmp);
+ tmp4 = tcg_const_i32(rn);
+ tmp5 = tcg_const_i32(n);
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
+ tcg_temp_free_i32(tmp);
if (insn & (1 << 6)) {
tmp = neon_load_reg(rd, 1);
} else {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
}
tmp3 = neon_load_reg(rm, 1);
- gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
- tcg_const_i32(n));
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
+ tcg_temp_free_i32(tmp5);
+ tcg_temp_free_i32(tmp4);
neon_store_reg(rd, 0, tmp2);
neon_store_reg(rd, 1, tmp3);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
} else if ((insn & 0x380) == 0) {
/* VDUP */
+ if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
+ return 1;
+ }
if (insn & (1 << 19)) {
- NEON_SET_REG(T0, rm, 1);
+ tmp = neon_load_reg(rm, 1);
} else {
- NEON_SET_REG(T0, rm, 0);
+ tmp = neon_load_reg(rm, 0);
}
if (insn & (1 << 16)) {
- gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8);
+ gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
} else if (insn & (1 << 17)) {
if ((insn >> 18) & 1)
- gen_neon_dup_high16(cpu_T[0]);
+ gen_neon_dup_high16(tmp);
else
- gen_neon_dup_low16(cpu_T[0]);
+ gen_neon_dup_low16(tmp);
}
for (pass = 0; pass < (q ? 4 : 2); pass++) {
- NEON_SET_REG(T0, rd, pass);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp2, tmp);
+ neon_store_reg(rd, pass, tmp2);
}
+ tcg_temp_free_i32(tmp);
} else {
return 1;
}
@@ -5580,6 +6084,34 @@ static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
int rt = (insn >> 12) & 0xf;
TCGv tmp;
+ /* Minimal set of debug registers, since we don't support debug */
+ if (op1 == 0 && crn == 0 && op2 == 0) {
+ switch (crm) {
+ case 0:
+ /* DBGDIDR: just RAZ. In particular this means the
+ * "debug architecture version" bits will read as
+ * a reserved value, which should cause Linux to
+ * not try to use the debug hardware.
+ */
+ tmp = tcg_const_i32(0);
+ store_reg(s, rt, tmp);
+ return 0;
+ case 1:
+ case 2:
+ /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we
+ * don't implement memory mapped debug components
+ */
+ if (ENABLE_ARCH_7) {
+ tmp = tcg_const_i32(0);
+ store_reg(s, rt, tmp);
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
/* TEECR */
@@ -5612,6 +6144,30 @@ static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
int rt = (insn >> 12) & 0xf;
TCGv tmp;
+ /* Minimal set of debug registers, since we don't support debug */
+ if (op1 == 0 && crn == 0 && op2 == 0) {
+ switch (crm) {
+ case 0:
+ /* DBGDIDR */
+ tmp = load_cpu_field(cp14_dbgdidr);
+ store_reg(s, rt, tmp);
+ return 0;
+ case 1:
+ case 2:
+ /* DBGDRAR and DBGDSAR: v7 only. Always RAZ since we
+ * don't implement memory mapped debug components
+ */
+ if (ENABLE_ARCH_7) {
+ tmp = tcg_const_i32(0);
+ store_reg(s, rt, tmp);
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
/* TEECR */
@@ -5619,7 +6175,7 @@ static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
return 1;
tmp = load_reg(s, rt);
gen_helper_set_teecr(cpu_env, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
return 0;
}
if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
@@ -5653,7 +6209,7 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
return disas_dsp_insn(env, s, insn);
}
- return 1;
+ goto board;
case 10:
case 11:
return disas_vfp_insn (env, s, insn);
@@ -5680,10 +6236,10 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
{
TCGv tmp;
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_trunc_i64_i32(tmp, val);
store_reg(s, rlow, tmp);
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_shri_i64(val, val, 32);
tcg_gen_trunc_i64_i32(tmp, val);
store_reg(s, rhigh, tmp);
@@ -5699,8 +6255,9 @@ static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
tmp = tcg_temp_new_i64();
tmp2 = load_reg(s, rlow);
tcg_gen_extu_i32_i64(tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tcg_gen_add_i64(val, val, tmp);
+ tcg_temp_free_i64(tmp);
}
/* load and add a 64-bit value from a register pair. */
@@ -5715,20 +6272,145 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
tmph = load_reg(s, rhigh);
tmp = tcg_temp_new_i64();
tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
- dead_tmp(tmpl);
- dead_tmp(tmph);
+ tcg_temp_free_i32(tmpl);
+ tcg_temp_free_i32(tmph);
tcg_gen_add_i64(val, val, tmp);
+ tcg_temp_free_i64(tmp);
}
/* Set N and Z flags from a 64-bit value. */
static void gen_logicq_cc(TCGv_i64 val)
{
- TCGv tmp = new_tmp();
+ TCGv tmp = tcg_temp_new_i32();
gen_helper_logicq_cc(tmp, val);
gen_logic_CC(tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
+}
+
+/* Load/Store exclusive instructions are implemented by remembering
+ the value/address loaded, and seeing if these are the same
+ when the store is performed. This should be is sufficient to implement
+ the architecturally mandated semantics, and avoids having to monitor
+ regular stores.
+
+ In system emulation mode only one CPU will be running at once, so
+ this sequence is effectively atomic. In user emulation mode we
+ throw an exception and handle the atomic operation elsewhere. */
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
+ TCGv addr, int size)
+{
+ TCGv tmp;
+
+ switch (size) {
+ case 0:
+ tmp = gen_ld8u(addr, IS_USER(s));
+ break;
+ case 1:
+ tmp = gen_ld16u(addr, IS_USER(s));
+ break;
+ case 2:
+ case 3:
+ tmp = gen_ld32(addr, IS_USER(s));
+ break;
+ default:
+ abort();
+ }
+ tcg_gen_mov_i32(cpu_exclusive_val, tmp);
+ store_reg(s, rt, tmp);
+ if (size == 3) {
+ TCGv tmp2 = tcg_temp_new_i32();
+ tcg_gen_addi_i32(tmp2, addr, 4);
+ tmp = gen_ld32(tmp2, IS_USER(s));
+ tcg_temp_free_i32(tmp2);
+ tcg_gen_mov_i32(cpu_exclusive_high, tmp);
+ store_reg(s, rt2, tmp);
+ }
+ tcg_gen_mov_i32(cpu_exclusive_addr, addr);
+}
+
+static void gen_clrex(DisasContext *s)
+{
+ tcg_gen_movi_i32(cpu_exclusive_addr, -1);
}
+#ifdef CONFIG_USER_ONLY
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+ TCGv addr, int size)
+{
+ tcg_gen_mov_i32(cpu_exclusive_test, addr);
+ tcg_gen_movi_i32(cpu_exclusive_info,
+ size | (rd << 4) | (rt << 8) | (rt2 << 12));
+ gen_exception_insn(s, 4, EXCP_STREX);
+}
+#else
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+ TCGv addr, int size)
+{
+ TCGv tmp;
+ int done_label;
+ int fail_label;
+
+ /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
+ [addr] = {Rt};
+ {Rd} = 0;
+ } else {
+ {Rd} = 1;
+ } */
+ fail_label = gen_new_label();
+ done_label = gen_new_label();
+ tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+ switch (size) {
+ case 0:
+ tmp = gen_ld8u(addr, IS_USER(s));
+ break;
+ case 1:
+ tmp = gen_ld16u(addr, IS_USER(s));
+ break;
+ case 2:
+ case 3:
+ tmp = gen_ld32(addr, IS_USER(s));
+ break;
+ default:
+ abort();
+ }
+ tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
+ tcg_temp_free_i32(tmp);
+ if (size == 3) {
+ TCGv tmp2 = tcg_temp_new_i32();
+ tcg_gen_addi_i32(tmp2, addr, 4);
+ tmp = gen_ld32(tmp2, IS_USER(s));
+ tcg_temp_free_i32(tmp2);
+ tcg_gen_brcond_i32(TCG_COND_NE, tmp, cpu_exclusive_high, fail_label);
+ tcg_temp_free_i32(tmp);
+ }
+ tmp = load_reg(s, rt);
+ switch (size) {
+ case 0:
+ gen_st8(tmp, addr, IS_USER(s));
+ break;
+ case 1:
+ gen_st16(tmp, addr, IS_USER(s));
+ break;
+ case 2:
+ case 3:
+ gen_st32(tmp, addr, IS_USER(s));
+ break;
+ default:
+ abort();
+ }
+ if (size == 3) {
+ tcg_gen_addi_i32(addr, addr, 4);
+ tmp = load_reg(s, rt2);
+ gen_st32(tmp, addr, IS_USER(s));
+ }
+ tcg_gen_movi_i32(cpu_R[rd], 0);
+ tcg_gen_br(done_label);
+ gen_set_label(fail_label);
+ tcg_gen_movi_i32(cpu_R[rd], 1);
+ gen_set_label(done_label);
+ tcg_gen_movi_i32(cpu_exclusive_addr, -1);
+}
+#endif
static void disas_arm_insn(CPUState * env, DisasContext *s)
{
@@ -5739,6 +6421,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
TCGv tmp3;
TCGv addr;
TCGv_i64 tmp64;
+
insn = ldl_code(s->pc);
ANDROID_WATCH_CALLSTACK_ARM(s);
@@ -5752,6 +6435,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
goto illegal_op;
cond = insn >> 28;
if (cond == 0xf){
+ /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
+ * choose to UNDEF. In ARMv5 and above the space is used
+ * for miscellaneous unconditional instructions.
+ */
+ ARCH(5);
+
ANDROID_TRACE_GEN_TICKS();
/* Unconditional instructions. */
if (((insn >> 25) & 7) == 1) {
@@ -5772,9 +6461,32 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
goto illegal_op;
return;
}
- if ((insn & 0x0d70f000) == 0x0550f000)
- return; /* PLD */
- else if ((insn & 0x0ffffdff) == 0x01010000) {
+ if (((insn & 0x0f30f000) == 0x0510f000) ||
+ ((insn & 0x0f30f010) == 0x0710f000)) {
+ if ((insn & (1 << 22)) == 0) {
+ /* PLDW; v7MP */
+ if (!arm_feature(env, ARM_FEATURE_V7MP)) {
+ goto illegal_op;
+ }
+ }
+ /* Otherwise PLD; v5TE+ */
+ ARCH(5TE);
+ return;
+ }
+ if (((insn & 0x0f70f000) == 0x0450f000) ||
+ ((insn & 0x0f70f010) == 0x0650f000)) {
+ ARCH(7);
+ return; /* PLI; V7 */
+ }
+ if (((insn & 0x0f700000) == 0x04100000) ||
+ ((insn & 0x0f700010) == 0x06100000)) {
+ if (!arm_feature(env, ARM_FEATURE_V7MP)) {
+ goto illegal_op;
+ }
+ return; /* v7MP: Unallocated memory hint: must NOP */
+ }
+
+ if ((insn & 0x0ffffdff) == 0x01010000) {
ARCH(6);
/* setend */
if (insn & (1 << 9)) {
@@ -5786,7 +6498,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
switch ((insn >> 4) & 0xf) {
case 1: /* clrex */
ARCH(6K);
- gen_helper_clrex(cpu_env);
+ gen_clrex(s);
return;
case 4: /* dsb */
case 5: /* dmb */
@@ -5799,22 +6511,20 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
} else if ((insn & 0x0e5fffe0) == 0x084d0500) {
/* srs */
- uint32_t offset;
+ int32_t offset;
if (IS_USER(s))
goto illegal_op;
ARCH(6);
op1 = (insn & 0x1f);
- if (op1 == (env->uncached_cpsr & CPSR_M)) {
- addr = load_reg(s, 13);
- } else {
- addr = new_tmp();
- gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op1));
- }
+ addr = tcg_temp_new_i32();
+ tmp = tcg_const_i32(op1);
+ gen_helper_get_r13_banked(addr, cpu_env, tmp);
+ tcg_temp_free_i32(tmp);
i = (insn >> 23) & 3;
switch (i) {
case 0: offset = -4; break; /* DA */
- case 1: offset = -8; break; /* DB */
- case 2: offset = 0; break; /* IA */
+ case 1: offset = 0; break; /* IA */
+ case 2: offset = -8; break; /* DB */
case 3: offset = 4; break; /* IB */
default: abort();
}
@@ -5822,32 +6532,31 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_addi_i32(addr, addr, offset);
tmp = load_reg(s, 14);
gen_st32(tmp, addr, 0);
- tmp = new_tmp();
- gen_helper_cpsr_read(tmp);
+ tmp = load_cpu_field(spsr);
tcg_gen_addi_i32(addr, addr, 4);
gen_st32(tmp, addr, 0);
if (insn & (1 << 21)) {
/* Base writeback. */
switch (i) {
case 0: offset = -8; break;
- case 1: offset = -4; break;
- case 2: offset = 4; break;
+ case 1: offset = 4; break;
+ case 2: offset = -4; break;
case 3: offset = 0; break;
default: abort();
}
if (offset)
- tcg_gen_addi_i32(addr, tmp, offset);
- if (op1 == (env->uncached_cpsr & CPSR_M)) {
- gen_movl_reg_T1(s, 13);
- } else {
- gen_helper_set_r13_banked(cpu_env, tcg_const_i32(op1), cpu_T[1]);
- }
+ tcg_gen_addi_i32(addr, addr, offset);
+ tmp = tcg_const_i32(op1);
+ gen_helper_set_r13_banked(cpu_env, tmp, addr);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
- } else if ((insn & 0x0e5fffe0) == 0x081d0a00) {
+ return;
+ } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
/* rfe */
- uint32_t offset;
+ int32_t offset;
if (IS_USER(s))
goto illegal_op;
ARCH(6);
@@ -5856,8 +6565,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
i = (insn >> 23) & 3;
switch (i) {
case 0: offset = -4; break; /* DA */
- case 1: offset = -8; break; /* DB */
- case 2: offset = 0; break; /* IA */
+ case 1: offset = 0; break; /* IA */
+ case 2: offset = -8; break; /* DB */
case 3: offset = 4; break; /* IB */
default: abort();
}
@@ -5871,8 +6580,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
/* Base writeback. */
switch (i) {
case 0: offset = -8; break;
- case 1: offset = -4; break;
- case 2: offset = 4; break;
+ case 1: offset = 4; break;
+ case 2: offset = -4; break;
case 3: offset = 0; break;
default: abort();
}
@@ -5880,15 +6589,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_addi_i32(addr, addr, offset);
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
gen_rfe(s, tmp, tmp2);
+ return;
} else if ((insn & 0x0e000000) == 0x0a000000) {
/* branch link and change to thumb (blx <offset>) */
int32_t offset;
val = (uint32_t)s->pc;
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, val);
store_reg(s, 14, tmp);
/* Sign-extend the 24-bit offset */
@@ -5897,6 +6607,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
val += (offset << 2) | ((insn >> 23) & 2) | 1;
/* pipeline offset */
val += 4;
+ /* protected by ARCH(5); above, near the start of uncond block */
gen_bx_im(s, val);
return;
} else if ((insn & 0x0e000f00) == 0x0c000100) {
@@ -5908,8 +6619,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
} else if ((insn & 0x0fe00000) == 0x0c400000) {
/* Coprocessor double register transfer. */
+ ARCH(5TE);
} else if ((insn & 0x0f000010) == 0x0e000010) {
/* Additional coprocessor register transfer. */
+ if (!disas_coproc_insn(env, s, insn)) {
+ return;
+ }
} else if ((insn & 0x0ff10020) == 0x01000000) {
uint32_t mask;
uint32_t val;
@@ -5932,8 +6647,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
val |= (insn & 0x1f);
}
if (mask) {
- gen_op_movl_T0_im(val);
- gen_set_psr_T0(s, mask, 0);
+ gen_set_psr_im(s, mask, 0, val);
}
return;
}
@@ -5955,7 +6669,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
if ((insn & (1 << 22)) == 0) {
/* MOVW */
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, val);
} else {
/* MOVT */
@@ -5975,9 +6689,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
shift = ((insn >> 8) & 0xf) * 2;
if (shift)
val = (val >> shift) | (val << (32 - shift));
- gen_op_movl_T0_im(val);
i = ((insn & (1 << 22)) != 0);
- if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
+ if (gen_set_psr_im(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, val))
goto illegal_op;
}
}
@@ -5991,9 +6704,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
case 0x0: /* move program status register */
if (op1 & 1) {
/* PSR = reg */
- gen_movl_T0_reg(s, rm);
+ tmp = load_reg(s, rm);
i = ((op1 & 2) != 0);
- if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
+ if (gen_set_psr(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i, tmp))
goto illegal_op;
} else {
/* reg = PSR */
@@ -6003,7 +6716,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
goto illegal_op;
tmp = load_cpu_field(spsr);
} else {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
gen_helper_cpsr_read(tmp);
}
store_reg(s, rd, tmp);
@@ -6012,10 +6725,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
case 0x1:
if (op1 == 1) {
/* branch/exchange thumb (bx). */
+ ARCH(4T);
tmp = load_reg(s, rm);
gen_bx(s, tmp);
} else if (op1 == 3) {
/* clz */
+ ARCH(5);
rd = (insn >> 12) & 0xf;
tmp = load_reg(s, rm);
gen_helper_clz(tmp, tmp);
@@ -6038,14 +6753,16 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if (op1 != 1)
goto illegal_op;
+ ARCH(5);
/* branch link/exchange thumb (blx) */
tmp = load_reg(s, rm);
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, s->pc);
store_reg(s, 14, tmp2);
gen_bx(s, tmp);
break;
case 0x5: /* saturating add/subtract */
+ ARCH(5TE);
rd = (insn >> 12) & 0xf;
rn = (insn >> 16) & 0xf;
tmp = load_reg(s, rm);
@@ -6056,19 +6773,29 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
gen_helper_sub_saturate(tmp, tmp, tmp2);
else
gen_helper_add_saturate(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
break;
- case 7: /* bkpt */
- gen_set_condexec(s);
- gen_set_pc_im(s->pc - 4);
- gen_exception(EXCP_BKPT);
- s->is_jmp = DISAS_JUMP;
+ case 7:
+ if (op1 == 1) {
+ /* bkpt */
+ ARCH(5);
+ gen_exception_insn(s, 4, EXCP_BKPT);
+ } else if (op1 == 3) {
+ /* smi/smc */
+ if (!(env->cp15.c0_c2[4] & 0xf000) || IS_USER(s)) {
+ goto illegal_op;
+ }
+ gen_smc(env, s);
+ } else {
+ goto illegal_op;
+ }
break;
case 0x8: /* signed multiply */
case 0xa:
case 0xc:
case 0xe:
+ ARCH(5TE);
rs = (insn >> 8) & 0xf;
rn = (insn >> 12) & 0xf;
rd = (insn >> 16) & 0xf;
@@ -6082,12 +6809,13 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
gen_sxth(tmp2);
tmp64 = gen_muls_i64_i32(tmp, tmp2);
tcg_gen_shri_i64(tmp64, tmp64, 16);
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_trunc_i64_i32(tmp, tmp64);
+ tcg_temp_free_i64(tmp64);
if ((sh & 2) == 0) {
tmp2 = load_reg(s, rn);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
store_reg(s, rd, tmp);
} else {
@@ -6095,18 +6823,19 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tmp = load_reg(s, rm);
tmp2 = load_reg(s, rs);
gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (op1 == 2) {
tmp64 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(tmp64, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
gen_addq(s, tmp64, rn, rd);
gen_storeq_reg(s, rn, rd, tmp64);
+ tcg_temp_free_i64(tmp64);
} else {
if (op1 == 0) {
tmp2 = load_reg(s, rn);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
store_reg(s, rd, tmp);
}
@@ -6132,7 +6861,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if (shift) {
val = (val >> shift) | (val << (32 - shift));
}
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, val);
if (logic_cc && shift) {
gen_set_CF_bit31(tmp2);
@@ -6235,26 +6964,26 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_and_i32(tmp, tmp, tmp2);
gen_logic_CC(tmp);
}
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
break;
case 0x09:
if (set_cc) {
tcg_gen_xor_i32(tmp, tmp, tmp2);
gen_logic_CC(tmp);
}
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
break;
case 0x0a:
if (set_cc) {
gen_helper_sub_cc(tmp, tmp, tmp2);
}
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
break;
case 0x0b:
if (set_cc) {
gen_helper_add_cc(tmp, tmp, tmp2);
}
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
break;
case 0x0c:
tcg_gen_or_i32(tmp, tmp, tmp2);
@@ -6278,7 +7007,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
break;
case 0x0e:
- tcg_gen_bic_i32(tmp, tmp, tmp2);
+ tcg_gen_andc_i32(tmp, tmp, tmp2);
if (logic_cc) {
gen_logic_CC(tmp);
}
@@ -6294,7 +7023,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
break;
}
if (op1 != 0x0f && op1 != 0x0d) {
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
} else {
/* other instructions */
@@ -6317,42 +7046,55 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tmp = load_reg(s, rs);
tmp2 = load_reg(s, rm);
tcg_gen_mul_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (insn & (1 << 22)) {
/* Subtract (mls) */
ARCH(6T2);
tmp2 = load_reg(s, rn);
tcg_gen_sub_i32(tmp, tmp2, tmp);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
} else if (insn & (1 << 21)) {
/* Add */
tmp2 = load_reg(s, rn);
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
if (insn & (1 << 20))
gen_logic_CC(tmp);
store_reg(s, rd, tmp);
break;
- default:
- /* 64 bit mul */
+ case 4:
+ /* 64 bit mul double accumulate (UMAAL) */
+ ARCH(6);
tmp = load_reg(s, rs);
tmp2 = load_reg(s, rm);
- if (insn & (1 << 22))
+ tmp64 = gen_mulu_i64_i32(tmp, tmp2);
+ gen_addq_lo(s, tmp64, rn);
+ gen_addq_lo(s, tmp64, rd);
+ gen_storeq_reg(s, rn, rd, tmp64);
+ tcg_temp_free_i64(tmp64);
+ break;
+ case 8: case 9: case 10: case 11:
+ case 12: case 13: case 14: case 15:
+ /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
+ tmp = load_reg(s, rs);
+ tmp2 = load_reg(s, rm);
+ if (insn & (1 << 22)) {
tmp64 = gen_muls_i64_i32(tmp, tmp2);
- else
+ } else {
tmp64 = gen_mulu_i64_i32(tmp, tmp2);
- if (insn & (1 << 21)) /* mult accumulate */
+ }
+ if (insn & (1 << 21)) { /* mult accumulate */
gen_addq(s, tmp64, rn, rd);
- if (!(insn & (1 << 23))) { /* double accumulate */
- ARCH(6);
- gen_addq_lo(s, tmp64, rn);
- gen_addq_lo(s, tmp64, rd);
}
- if (insn & (1 << 20))
+ if (insn & (1 << 20)) {
gen_logicq_cc(tmp64);
+ }
gen_storeq_reg(s, rn, rd, tmp64);
+ tcg_temp_free_i64(tmp64);
break;
+ default:
+ goto illegal_op;
}
} else {
rn = (insn >> 16) & 0xf;
@@ -6364,60 +7106,45 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
ARCH(6K);
else
ARCH(6);
- gen_movl_T1_reg(s, rn);
- addr = cpu_T[1];
+ addr = tcg_temp_local_new_i32();
+ load_reg_var(s, addr, rn);
if (insn & (1 << 20)) {
- gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
switch (op1) {
case 0: /* ldrex */
- tmp = gen_ld32(addr, IS_USER(s));
+ gen_load_exclusive(s, rd, 15, addr, 2);
break;
case 1: /* ldrexd */
- tmp = gen_ld32(addr, IS_USER(s));
- store_reg(s, rd, tmp);
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = gen_ld32(addr, IS_USER(s));
- rd++;
+ gen_load_exclusive(s, rd, rd + 1, addr, 3);
break;
case 2: /* ldrexb */
- tmp = gen_ld8u(addr, IS_USER(s));
+ gen_load_exclusive(s, rd, 15, addr, 0);
break;
case 3: /* ldrexh */
- tmp = gen_ld16u(addr, IS_USER(s));
+ gen_load_exclusive(s, rd, 15, addr, 1);
break;
default:
abort();
}
- store_reg(s, rd, tmp);
} else {
- int label = gen_new_label();
rm = insn & 0xf;
- gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
- tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
- 0, label);
- tmp = load_reg(s,rm);
switch (op1) {
case 0: /* strex */
- gen_st32(tmp, addr, IS_USER(s));
+ gen_store_exclusive(s, rd, rm, 15, addr, 2);
break;
case 1: /* strexd */
- gen_st32(tmp, addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = load_reg(s, rm + 1);
- gen_st32(tmp, addr, IS_USER(s));
+ gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
break;
case 2: /* strexb */
- gen_st8(tmp, addr, IS_USER(s));
+ gen_store_exclusive(s, rd, rm, 15, addr, 0);
break;
case 3: /* strexh */
- gen_st16(tmp, addr, IS_USER(s));
+ gen_store_exclusive(s, rd, rm, 15, addr, 1);
break;
default:
abort();
}
- gen_set_label(label);
- gen_movl_reg_T0(s, rd);
}
+ tcg_temp_free(addr);
} else {
/* SWP instruction */
rm = (insn) & 0xf;
@@ -6434,7 +7161,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tmp2 = gen_ld32(addr, IS_USER(s));
gen_st32(tmp, addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
store_reg(s, rd, tmp2);
}
}
@@ -6464,6 +7191,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
load = 1;
} else if (sh & 2) {
+ ARCH(5TE);
/* doubleword */
if (sh & 1) {
/* store */
@@ -6501,7 +7229,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_addi_i32(addr, addr, address_offset);
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
if (load) {
/* Complete the load. */
@@ -6530,7 +7258,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if ((op1 & 3) == 0 || sh == 5 || sh == 6)
goto illegal_op;
gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
break;
case 1:
@@ -6554,7 +7282,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
}
tcg_gen_or_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
} else if ((insn & 0x00200020) == 0x00200000) {
/* [us]sat */
@@ -6568,41 +7296,41 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tcg_gen_shli_i32(tmp, tmp, shift);
}
sh = (insn >> 16) & 0x1f;
- if (sh != 0) {
- if (insn & (1 << 22))
- gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
- else
- gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
- }
+ tmp2 = tcg_const_i32(sh);
+ if (insn & (1 << 22))
+ gen_helper_usat(tmp, tmp, tmp2);
+ else
+ gen_helper_ssat(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
} else if ((insn & 0x00300fe0) == 0x00200f20) {
/* [us]sat16 */
tmp = load_reg(s, rm);
sh = (insn >> 16) & 0x1f;
- if (sh != 0) {
- if (insn & (1 << 22))
- gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
- else
- gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
- }
+ tmp2 = tcg_const_i32(sh);
+ if (insn & (1 << 22))
+ gen_helper_usat16(tmp, tmp, tmp2);
+ else
+ gen_helper_ssat16(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
} else if ((insn & 0x00700fe0) == 0x00000fa0) {
/* Select bytes. */
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
- tmp3 = new_tmp();
+ tmp3 = tcg_temp_new_i32();
tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
- dead_tmp(tmp3);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp3);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
} else if ((insn & 0x000003e0) == 0x00000060) {
tmp = load_reg(s, rm);
shift = (insn >> 10) & 3;
- /* ??? In many cases it's not neccessary to do a
+ /* ??? In many cases it's not necessary to do a
rotate, a shift is sufficient. */
if (shift != 0)
- tcg_gen_rori_i32(tmp, tmp, shift * 8);
+ tcg_gen_rotri_i32(tmp, tmp, shift * 8);
op1 = (insn >> 20) & 7;
switch (op1) {
case 0: gen_sxtb16(tmp); break;
@@ -6611,7 +7339,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
case 4: gen_uxtb16(tmp); break;
case 6: gen_uxtb(tmp); break;
case 7: gen_uxth(tmp); break;
- default: goto illegal_op;
+ default: tcg_temp_free_i32(tmp); goto illegal_op;
}
if (rn != 15) {
tmp2 = load_reg(s, rn);
@@ -6619,7 +7347,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
gen_add16(tmp, tmp2);
} else {
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
}
store_reg(s, rd, tmp);
@@ -6648,48 +7376,56 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tmp = load_reg(s, rm);
tmp2 = load_reg(s, rs);
if (insn & (1 << 20)) {
- /* Signed multiply most significant [accumulate]. */
+ /* Signed multiply most significant [accumulate].
+ (SMMUL, SMMLA, SMMLS) */
tmp64 = gen_muls_i64_i32(tmp, tmp2);
- if (insn & (1 << 5))
- tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
- tcg_gen_shri_i64(tmp64, tmp64, 32);
- tmp = new_tmp();
- tcg_gen_trunc_i64_i32(tmp, tmp64);
+
if (rd != 15) {
- tmp2 = load_reg(s, rd);
+ tmp = load_reg(s, rd);
if (insn & (1 << 6)) {
- tcg_gen_sub_i32(tmp, tmp, tmp2);
+ tmp64 = gen_subq_msw(tmp64, tmp);
} else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
+ tmp64 = gen_addq_msw(tmp64, tmp);
}
- dead_tmp(tmp2);
}
+ if (insn & (1 << 5)) {
+ tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
+ }
+ tcg_gen_shri_i64(tmp64, tmp64, 32);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_trunc_i64_i32(tmp, tmp64);
+ tcg_temp_free_i64(tmp64);
store_reg(s, rn, tmp);
} else {
if (insn & (1 << 5))
gen_swap_half(tmp2);
gen_smul_dual(tmp, tmp2);
- /* This addition cannot overflow. */
if (insn & (1 << 6)) {
+ /* This subtraction cannot overflow. */
tcg_gen_sub_i32(tmp, tmp, tmp2);
} else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
+ /* This addition cannot overflow 32 bits;
+ * however it may overflow considered as a signed
+ * operation, in which case we must set the Q flag.
+ */
+ gen_helper_add_setq(tmp, tmp, tmp2);
}
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (insn & (1 << 22)) {
/* smlald, smlsld */
tmp64 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(tmp64, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
gen_addq(s, tmp64, rd, rn);
gen_storeq_reg(s, rd, rn, tmp64);
+ tcg_temp_free_i64(tmp64);
} else {
/* smuad, smusd, smlad, smlsd */
if (rd != 15)
{
tmp2 = load_reg(s, rd);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
store_reg(s, rn, tmp);
}
@@ -6703,11 +7439,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
tmp = load_reg(s, rm);
tmp2 = load_reg(s, rs);
gen_helper_usad8(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (rd != 15) {
tmp2 = load_reg(s, rd);
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
store_reg(s, rn, tmp);
break;
@@ -6718,7 +7454,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
i = (insn >> 16) & 0x1f;
i = i + 1 - shift;
if (rm == 15) {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
} else {
tmp = load_reg(s, rm);
@@ -6726,7 +7462,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if (i != 32) {
tmp2 = load_reg(s, rd);
gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
store_reg(s, rd, tmp);
break;
@@ -6792,14 +7528,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
} else if (insn & (1 << 21)) {
store_reg(s, rn, tmp2);
} else {
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
if (insn & (1 << 20)) {
/* Complete the load. */
- if (rd == 15)
- gen_bx(s, tmp);
- else
- store_reg(s, rd, tmp);
+ store_reg_from_load(env, s, rd, tmp);
}
break;
case 0x08:
@@ -6819,6 +7552,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
rn = (insn >> 16) & 0xf;
addr = load_reg(s, rn);
+ tmp3 = tcg_const_i32(4);
/* compute total size */
loaded_base = 0;
@@ -6832,7 +7566,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if (insn & (1 << 23)) {
if (insn & (1 << 24)) {
/* pre increment */
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp3);
} else {
/* post increment */
}
@@ -6852,27 +7586,29 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
if (insn & (1 << 20)) {
/* load */
tmp = gen_ld32(addr, IS_USER(s));
- if (i == 15) {
- gen_bx(s, tmp);
- } else if (user) {
- gen_helper_set_user_reg(tcg_const_i32(i), tmp);
- dead_tmp(tmp);
+ if (user) {
+ tmp2 = tcg_const_i32(i);
+ gen_helper_set_user_reg(tmp2, tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
} else if (i == rn) {
loaded_var = tmp;
loaded_base = 1;
} else {
- store_reg(s, i, tmp);
+ store_reg_from_load(env, s, i, tmp);
}
} else {
/* store */
if (i == 15) {
/* special case: r15 = PC + 8 */
val = (long)s->pc + 4;
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, val);
} else if (user) {
- tmp = new_tmp();
- gen_helper_get_user_reg(tmp, tcg_const_i32(i));
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_const_i32(i);
+ gen_helper_get_user_reg(tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
} else {
tmp = load_reg(s, i);
}
@@ -6881,7 +7617,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
j++;
/* no need to add after the last transfer */
if (j != n)
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp3);
}
}
if (insn & (1 << 21)) {
@@ -6891,7 +7627,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
/* pre increment */
} else {
/* post increment */
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp3);
}
} else {
if (insn & (1 << 24)) {
@@ -6905,8 +7641,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
}
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
+ tcg_temp_free_i32(tmp3);
if (loaded_base) {
store_reg(s, rn, loaded_var);
}
@@ -6914,7 +7651,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
/* Restore CPSR from SPSR. */
tmp = load_cpu_field(spsr);
gen_set_cpsr(tmp, 0xffffffff);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
s->is_jmp = DISAS_UPDATE;
}
}
@@ -6923,10 +7660,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
case 0xb:
{
int32_t offset;
+
/* branch (and link) */
val = (int32_t)s->pc;
if (insn & (1 << 24)) {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, val);
store_reg(s, 14, tmp);
}
@@ -6949,10 +7687,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
break;
default:
illegal_op:
- gen_set_condexec(s);
- gen_set_pc_im(s->pc - 4);
- gen_exception(EXCP_UDEF);
- s->is_jmp = DISAS_JUMP;
+ gen_exception_insn(s, 4, EXCP_UDEF);
break;
}
}
@@ -6972,70 +7707,69 @@ thumb2_logic_op(int op)
Returns zero if the opcode is valid. */
static int
-gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out)
+gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, TCGv t0, TCGv t1)
{
int logic_cc;
logic_cc = 0;
switch (op) {
case 0: /* and */
- gen_op_andl_T0_T1();
+ tcg_gen_and_i32(t0, t0, t1);
logic_cc = conds;
break;
case 1: /* bic */
- gen_op_bicl_T0_T1();
+ tcg_gen_andc_i32(t0, t0, t1);
logic_cc = conds;
break;
case 2: /* orr */
- gen_op_orl_T0_T1();
+ tcg_gen_or_i32(t0, t0, t1);
logic_cc = conds;
break;
case 3: /* orn */
- gen_op_notl_T1();
- gen_op_orl_T0_T1();
+ tcg_gen_orc_i32(t0, t0, t1);
logic_cc = conds;
break;
case 4: /* eor */
- gen_op_xorl_T0_T1();
+ tcg_gen_xor_i32(t0, t0, t1);
logic_cc = conds;
break;
case 8: /* add */
if (conds)
- gen_op_addl_T0_T1_cc();
+ gen_helper_add_cc(t0, t0, t1);
else
- gen_op_addl_T0_T1();
+ tcg_gen_add_i32(t0, t0, t1);
break;
case 10: /* adc */
if (conds)
- gen_op_adcl_T0_T1_cc();
+ gen_helper_adc_cc(t0, t0, t1);
else
- gen_adc_T0_T1();
+ gen_adc(t0, t1);
break;
case 11: /* sbc */
if (conds)
- gen_op_sbcl_T0_T1_cc();
+ gen_helper_sbc_cc(t0, t0, t1);
else
- gen_sbc_T0_T1();
+ gen_sub_carry(t0, t0, t1);
break;
case 13: /* sub */
if (conds)
- gen_op_subl_T0_T1_cc();
+ gen_helper_sub_cc(t0, t0, t1);
else
- gen_op_subl_T0_T1();
+ tcg_gen_sub_i32(t0, t0, t1);
break;
case 14: /* rsb */
if (conds)
- gen_op_rsbl_T0_T1_cc();
+ gen_helper_sub_cc(t0, t1, t0);
else
- gen_op_rsbl_T0_T1();
+ tcg_gen_sub_i32(t0, t1, t0);
break;
default: /* 5, 6, 7, 9, 12, 15. */
return 1;
}
if (logic_cc) {
- gen_op_logic_T0_cc();
+ gen_logic_CC(t0);
if (shifter_out)
- gen_set_CF_bit31(cpu_T[1]);
+ gen_set_CF_bit31(t1);
}
return 0;
}
@@ -7062,13 +7796,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
16-bit instructions to get correct prefetch abort behavior. */
insn = insn_hw1;
if ((insn & (1 << 12)) == 0) {
+ ARCH(5);
/* Second half of blx. */
offset = ((insn & 0x7ff) << 1);
tmp = load_reg(s, 14);
tcg_gen_addi_i32(tmp, tmp, offset);
tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, s->pc | 1);
store_reg(s, 14, tmp2);
gen_bx(s, tmp);
@@ -7080,7 +7815,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tmp = load_reg(s, 14);
tcg_gen_addi_i32(tmp, tmp, offset);
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, s->pc | 1);
store_reg(s, 14, tmp2);
gen_bx(s, tmp);
@@ -7091,8 +7826,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
16-bit instructions in case the second half causes an
prefetch abort. */
offset = ((int32_t)insn << 21) >> 9;
- gen_op_movl_T0_im(s->pc + 2 + offset);
- gen_movl_reg_T0(s, 14);
+ tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
return 0;
}
/* Fall through to 32-bit decode. */
@@ -7100,10 +7834,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
insn = lduw_code(s->pc);
ANDROID_TRACE_START_THUMB();
-
- insn |= (uint32_t)insn_hw1 << 16;
-
s->pc += 2;
+ insn |= (uint32_t)insn_hw1 << 16;
if ((insn & 0xf800e800) != 0xf000e800) {
ARCH(6T2);
@@ -7123,7 +7855,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (insn & 0x01200000) {
/* Load/store doubleword. */
if (rn == 15) {
- addr = new_tmp();
+ addr = tcg_temp_new_i32();
tcg_gen_movi_i32(addr, s->pc & ~3);
} else {
addr = load_reg(s, rn);
@@ -7157,30 +7889,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tcg_gen_addi_i32(addr, addr, offset - 4);
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
} else if ((insn & (1 << 23)) == 0) {
/* Load/store exclusive word. */
- gen_movl_T1_reg(s, rn);
- addr = cpu_T[1];
+ addr = tcg_temp_local_new();
+ load_reg_var(s, addr, rn);
+ tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
if (insn & (1 << 20)) {
- gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
- tmp = gen_ld32(addr, IS_USER(s));
- store_reg(s, rd, tmp);
+ gen_load_exclusive(s, rs, 15, addr, 2);
} else {
- int label = gen_new_label();
- gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
- tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
- 0, label);
- tmp = load_reg(s, rs);
- gen_st32(tmp, cpu_T[1], IS_USER(s));
- gen_set_label(label);
- gen_movl_reg_T0(s, rd);
+ gen_store_exclusive(s, rd, rs, 15, addr, 2);
}
+ tcg_temp_free(addr);
} else if ((insn & (1 << 6)) == 0) {
/* Table Branch. */
if (rn == 15) {
- addr = new_tmp();
+ addr = tcg_temp_new_i32();
tcg_gen_movi_i32(addr, s->pc);
} else {
addr = load_reg(s, rn);
@@ -7190,70 +7915,31 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (insn & (1 << 4)) {
/* tbh */
tcg_gen_add_i32(addr, addr, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = gen_ld16u(addr, IS_USER(s));
} else { /* tbb */
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
tmp = gen_ld8u(addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
tcg_gen_shli_i32(tmp, tmp, 1);
tcg_gen_addi_i32(tmp, tmp, s->pc);
store_reg(s, 15, tmp);
} else {
/* Load/store exclusive byte/halfword/doubleword. */
- /* ??? These are not really atomic. However we know
- we never have multiple CPUs running in parallel,
- so it is good enough. */
+ ARCH(7);
op = (insn >> 4) & 0x3;
- /* Must use a global reg for the address because we have
- a conditional branch in the store instruction. */
- gen_movl_T1_reg(s, rn);
- addr = cpu_T[1];
+ if (op == 2) {
+ goto illegal_op;
+ }
+ addr = tcg_temp_local_new();
+ load_reg_var(s, addr, rn);
if (insn & (1 << 20)) {
- gen_helper_mark_exclusive(cpu_env, addr);
- switch (op) {
- case 0:
- tmp = gen_ld8u(addr, IS_USER(s));
- break;
- case 1:
- tmp = gen_ld16u(addr, IS_USER(s));
- break;
- case 3:
- tmp = gen_ld32(addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, 4);
- tmp2 = gen_ld32(addr, IS_USER(s));
- store_reg(s, rd, tmp2);
- break;
- default:
- goto illegal_op;
- }
- store_reg(s, rs, tmp);
+ gen_load_exclusive(s, rs, rd, addr, op);
} else {
- int label = gen_new_label();
- /* Must use a global that is not killed by the branch. */
- gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
- tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label);
- tmp = load_reg(s, rs);
- switch (op) {
- case 0:
- gen_st8(tmp, addr, IS_USER(s));
- break;
- case 1:
- gen_st16(tmp, addr, IS_USER(s));
- break;
- case 3:
- gen_st32(tmp, addr, IS_USER(s));
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = load_reg(s, rd);
- gen_st32(tmp, addr, IS_USER(s));
- break;
- default:
- goto illegal_op;
- }
- gen_set_label(label);
- gen_movl_reg_T0(s, rm);
+ gen_store_exclusive(s, rm, rs, rd, addr, op);
}
+ tcg_temp_free(addr);
}
} else {
/* Load/store multiple, RFE, SRS. */
@@ -7279,25 +7965,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
}
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
gen_rfe(s, tmp, tmp2);
} else {
/* srs */
op = (insn & 0x1f);
- if (op == (env->uncached_cpsr & CPSR_M)) {
- addr = load_reg(s, 13);
- } else {
- addr = new_tmp();
- gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op));
- }
+ addr = tcg_temp_new_i32();
+ tmp = tcg_const_i32(op);
+ gen_helper_get_r13_banked(addr, cpu_env, tmp);
+ tcg_temp_free_i32(tmp);
if ((insn & (1 << 24)) == 0) {
tcg_gen_addi_i32(addr, addr, -8);
}
tmp = load_reg(s, 14);
gen_st32(tmp, addr, 0);
tcg_gen_addi_i32(addr, addr, 4);
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
gen_helper_cpsr_read(tmp);
gen_st32(tmp, addr, 0);
if (insn & (1 << 21)) {
@@ -7306,18 +7990,16 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
} else {
tcg_gen_addi_i32(addr, addr, 4);
}
- if (op == (env->uncached_cpsr & CPSR_M)) {
- store_reg(s, 13, addr);
- } else {
- gen_helper_set_r13_banked(cpu_env,
- tcg_const_i32(op), addr);
- }
+ tmp = tcg_const_i32(op);
+ gen_helper_set_r13_banked(cpu_env, tmp, addr);
+ tcg_temp_free_i32(tmp);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
}
} else {
- int i;
+ int i, loaded_base = 0;
+ TCGv loaded_var;
/* Load/store multiple. */
addr = load_reg(s, rn);
offset = 0;
@@ -7329,6 +8011,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tcg_gen_addi_i32(addr, addr, -offset);
}
+ TCGV_UNUSED(loaded_var);
+ tmp2 = tcg_const_i32(4);
for (i = 0; i < 16; i++) {
if ((insn & (1 << i)) == 0)
continue;
@@ -7337,6 +8021,9 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tmp = gen_ld32(addr, IS_USER(s));
if (i == 15) {
gen_bx(s, tmp);
+ } else if (i == rn) {
+ loaded_var = tmp;
+ loaded_base = 1;
} else {
store_reg(s, i, tmp);
}
@@ -7345,8 +8032,12 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tmp = load_reg(s, i);
gen_st32(tmp, addr, IS_USER(s));
}
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp2);
}
+ if (loaded_base) {
+ store_reg(s, rn, loaded_var);
+ }
+ tcg_temp_free_i32(tmp2);
if (insn & (1 << 21)) {
/* Base register writeback. */
if (insn & (1 << 24)) {
@@ -7357,27 +8048,60 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
goto illegal_op;
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
}
}
break;
- case 5: /* Data processing register constant shift. */
- if (rn == 15)
- gen_op_movl_T0_im(0);
- else
- gen_movl_T0_reg(s, rn);
- gen_movl_T1_reg(s, rm);
+ case 5:
+
op = (insn >> 21) & 0xf;
- shiftop = (insn >> 4) & 3;
- shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
- conds = (insn & (1 << 20)) != 0;
- logic_cc = (conds && thumb2_logic_op(op));
- gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
- if (gen_thumb2_data_op(s, op, conds, 0))
- goto illegal_op;
- if (rd != 15)
- gen_movl_reg_T0(s, rd);
+ if (op == 6) {
+ /* Halfword pack. */
+ tmp = load_reg(s, rn);
+ tmp2 = load_reg(s, rm);
+ shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
+ if (insn & (1 << 5)) {
+ /* pkhtb */
+ if (shift == 0)
+ shift = 31;
+ tcg_gen_sari_i32(tmp2, tmp2, shift);
+ tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
+ tcg_gen_ext16u_i32(tmp2, tmp2);
+ } else {
+ /* pkhbt */
+ if (shift)
+ tcg_gen_shli_i32(tmp2, tmp2, shift);
+ tcg_gen_ext16u_i32(tmp, tmp);
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
+ }
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ store_reg(s, rd, tmp);
+ } else {
+ /* Data processing register constant shift. */
+ if (rn == 15) {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ } else {
+ tmp = load_reg(s, rn);
+ }
+ tmp2 = load_reg(s, rm);
+
+ shiftop = (insn >> 4) & 3;
+ shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
+ conds = (insn & (1 << 20)) != 0;
+ logic_cc = (conds && thumb2_logic_op(op));
+ gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
+ if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
+ goto illegal_op;
+ tcg_temp_free_i32(tmp2);
+ if (rd != 15) {
+ store_reg(s, rd, tmp);
+ } else {
+ tcg_temp_free_i32(tmp);
+ }
+ }
break;
case 13: /* Misc data processing. */
op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
@@ -7399,10 +8123,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
case 1: /* Sign/zero extend. */
tmp = load_reg(s, rm);
shift = (insn >> 4) & 3;
- /* ??? In many cases it's not neccessary to do a
+ /* ??? In many cases it's not necessary to do a
rotate, a shift is sufficient. */
if (shift != 0)
- tcg_gen_rori_i32(tmp, tmp, shift * 8);
+ tcg_gen_rotri_i32(tmp, tmp, shift * 8);
op = (insn >> 20) & 7;
switch (op) {
case 0: gen_sxth(tmp); break;
@@ -7419,7 +8143,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
gen_add16(tmp, tmp2);
} else {
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
}
store_reg(s, rd, tmp);
@@ -7432,7 +8156,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
break;
case 3: /* Other data processing. */
@@ -7441,13 +8165,13 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
/* Saturating add/subtract. */
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
- if (op & 2)
- gen_helper_double_saturate(tmp, tmp);
if (op & 1)
+ gen_helper_double_saturate(tmp, tmp);
+ if (op & 2)
gen_helper_sub_saturate(tmp, tmp2, tmp);
else
gen_helper_add_saturate(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
} else {
tmp = load_reg(s, rn);
switch (op) {
@@ -7465,11 +8189,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
break;
case 0x10: /* sel */
tmp2 = load_reg(s, rm);
- tmp3 = new_tmp();
+ tmp3 = tcg_temp_new_i32();
tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
- dead_tmp(tmp3);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp3);
+ tcg_temp_free_i32(tmp2);
break;
case 0x18: /* clz */
gen_helper_clz(tmp, tmp);
@@ -7487,23 +8211,23 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
switch ((insn >> 20) & 7) {
case 0: /* 32 x 32 -> 32 */
tcg_gen_mul_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (rs != 15) {
tmp2 = load_reg(s, rs);
if (op)
tcg_gen_sub_i32(tmp, tmp2, tmp);
else
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
case 1: /* 16 x 16 -> 32 */
gen_mulxy(tmp, tmp2, op & 2, op & 1);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (rs != 15) {
tmp2 = load_reg(s, rs);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
case 2: /* Dual multiply add. */
@@ -7511,18 +8235,22 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (op)
gen_swap_half(tmp2);
gen_smul_dual(tmp, tmp2);
- /* This addition cannot overflow. */
if (insn & (1 << 22)) {
+ /* This subtraction cannot overflow. */
tcg_gen_sub_i32(tmp, tmp, tmp2);
} else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
+ /* This addition cannot overflow 32 bits;
+ * however it may overflow considered as a signed
+ * operation, in which case we must set the Q flag.
+ */
+ gen_helper_add_setq(tmp, tmp, tmp2);
}
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (rs != 15)
{
tmp2 = load_reg(s, rs);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
case 3: /* 32 * 16 -> 32msb */
@@ -7532,41 +8260,41 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
gen_sxth(tmp2);
tmp64 = gen_muls_i64_i32(tmp, tmp2);
tcg_gen_shri_i64(tmp64, tmp64, 16);
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_trunc_i64_i32(tmp, tmp64);
+ tcg_temp_free_i64(tmp64);
if (rs != 15)
{
tmp2 = load_reg(s, rs);
gen_helper_add_setq(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
- case 5: case 6: /* 32 * 32 -> 32msb */
- gen_imull(tmp, tmp2);
- if (insn & (1 << 5)) {
- gen_roundqd(tmp, tmp2);
- dead_tmp(tmp2);
- } else {
- dead_tmp(tmp);
- tmp = tmp2;
- }
+ case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
+ tmp64 = gen_muls_i64_i32(tmp, tmp2);
if (rs != 15) {
- tmp2 = load_reg(s, rs);
- if (insn & (1 << 21)) {
- tcg_gen_add_i32(tmp, tmp, tmp2);
+ tmp = load_reg(s, rs);
+ if (insn & (1 << 20)) {
+ tmp64 = gen_addq_msw(tmp64, tmp);
} else {
- tcg_gen_sub_i32(tmp, tmp2, tmp);
+ tmp64 = gen_subq_msw(tmp64, tmp);
}
- dead_tmp(tmp2);
}
+ if (insn & (1 << 4)) {
+ tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
+ }
+ tcg_gen_shri_i64(tmp64, tmp64, 32);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_trunc_i64_i32(tmp, tmp64);
+ tcg_temp_free_i64(tmp64);
break;
case 7: /* Unsigned sum of absolute differences. */
gen_helper_usad8(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
if (rs != 15) {
tmp2 = load_reg(s, rs);
tcg_gen_add_i32(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
}
@@ -7584,7 +8312,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
gen_helper_udiv(tmp, tmp, tmp2);
else
gen_helper_sdiv(tmp, tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
store_reg(s, rd, tmp);
} else if ((op & 0xe) == 0xc) {
/* Dual multiply accumulate long. */
@@ -7596,13 +8324,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
} else {
tcg_gen_add_i32(tmp, tmp, tmp2);
}
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
/* BUGFIX */
tmp64 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(tmp64, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
gen_addq(s, tmp64, rs, rd);
gen_storeq_reg(s, rs, rd, tmp64);
+ tcg_temp_free_i64(tmp64);
} else {
if (op & 0x20) {
/* Unsigned 64-bit multiply */
@@ -7611,10 +8340,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (op & 8) {
/* smlalxy */
gen_mulxy(tmp, tmp2, op & 2, op & 1);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
tmp64 = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(tmp64, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
} else {
/* Signed 64-bit multiply */
tmp64 = gen_muls_i64_i32(tmp, tmp2);
@@ -7629,6 +8358,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
gen_addq(s, tmp64, rs, rd);
}
gen_storeq_reg(s, rs, rd, tmp64);
+ tcg_temp_free_i64(tmp64);
}
break;
}
@@ -7637,7 +8367,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
/* Coprocessor. */
if (((insn >> 24) & 3) == 3) {
/* Translate into the equivalent ARM encoding. */
- insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
+ insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
if (disas_neon_data_insn(env, s, insn))
goto illegal_op;
} else {
@@ -7664,8 +8394,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (insn & (1 << 14)) {
/* Branch and link. */
- gen_op_movl_T1_im(s->pc | 1);
- gen_movl_reg_T1(s, 14);
+ tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
}
offset += s->pc;
@@ -7675,6 +8404,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
} else {
/* blx */
offset &= ~(uint32_t)2;
+ /* thumb2 bx, no need to check */
gen_bx_im(s, offset);
}
} else if (((insn >> 23) & 7) == 7) {
@@ -7683,8 +8413,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
goto illegal_op;
if (insn & (1 << 26)) {
- /* Secure monitor call (v6Z) */
- goto illegal_op; /* not implemented. */
+ /* Secure monitor call / smc (v6Z) */
+ if (!(env->cp15.c0_c2[4] & 0xf000) || IS_USER(s)) {
+ goto illegal_op;
+ }
+ gen_smc(env, s);
} else {
op = (insn >> 20) & 7;
switch (op) {
@@ -7693,6 +8426,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tmp = load_reg(s, rn);
addr = tcg_const_i32(insn & 0xff);
gen_helper_v7m_msr(cpu_env, addr, tmp);
+ tcg_temp_free_i32(addr);
+ tcg_temp_free_i32(tmp);
gen_lookup_tb(s);
break;
}
@@ -7700,10 +8435,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
case 1: /* msr spsr. */
if (IS_M(env))
goto illegal_op;
- gen_movl_T0_reg(s, rn);
- if (gen_set_psr_T0(s,
+ tmp = load_reg(s, rn);
+ if (gen_set_psr(s,
msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
- op == 1))
+ op == 1, tmp))
goto illegal_op;
break;
case 2: /* cps, nop-hint. */
@@ -7730,21 +8465,20 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
imm |= (insn & 0x1f);
}
if (offset) {
- gen_op_movl_T0_im(imm);
- gen_set_psr_T0(s, offset, 0);
+ gen_set_psr_im(s, offset, 0, imm);
}
break;
case 3: /* Special control operations. */
+ ARCH(7);
op = (insn >> 4) & 0xf;
switch (op) {
case 2: /* clrex */
- gen_helper_clrex(cpu_env);
+ gen_clrex(s);
break;
case 4: /* dsb */
case 5: /* dmb */
case 6: /* isb */
/* These execute as NOPs. */
- ARCH(7);
break;
default:
goto illegal_op;
@@ -7756,13 +8490,22 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
gen_bx(s, tmp);
break;
case 5: /* Exception return. */
- /* Unpredictable in user mode. */
- goto illegal_op;
+ if (IS_USER(s)) {
+ goto illegal_op;
+ }
+ if (rn != 14 || rd != 15) {
+ goto illegal_op;
+ }
+ tmp = load_reg(s, rn);
+ tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
+ gen_exception_return(s, tmp);
+ break;
case 6: /* mrs cpsr. */
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
if (IS_M(env)) {
addr = tcg_const_i32(insn & 0xff);
gen_helper_v7m_mrs(tmp, cpu_env, addr);
+ tcg_temp_free_i32(addr);
} else {
gen_helper_cpsr_read(tmp);
}
@@ -7810,7 +8553,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
imm = insn & 0x1f;
shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
if (rn == 15) {
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, 0);
} else {
tmp = load_reg(s, rn);
@@ -7837,7 +8580,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (imm != 32) {
tmp2 = load_reg(s, rd);
gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
- dead_tmp(tmp2);
+ tcg_temp_free_i32(tmp2);
}
break;
case 7:
@@ -7863,6 +8606,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
else
gen_helper_ssat(tmp, tmp, tmp2);
}
+ tcg_temp_free_i32(tmp2);
break;
}
store_reg(s, rd, tmp);
@@ -7879,7 +8623,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
tcg_gen_ori_i32(tmp, tmp, imm << 16);
} else {
/* movw */
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, imm);
}
} else {
@@ -7890,7 +8634,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
offset -= imm;
else
offset += imm;
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, offset);
} else {
tmp = load_reg(s, rn);
@@ -7929,19 +8673,25 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
shifter_out = 1;
break;
}
- gen_op_movl_T1_im(imm);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp2, imm);
rn = (insn >> 16) & 0xf;
- if (rn == 15)
- gen_op_movl_T0_im(0);
- else
- gen_movl_T0_reg(s, rn);
+ if (rn == 15) {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ } else {
+ tmp = load_reg(s, rn);
+ }
op = (insn >> 21) & 0xf;
if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
- shifter_out))
+ shifter_out, tmp, tmp2))
goto illegal_op;
+ tcg_temp_free_i32(tmp2);
rd = (insn >> 8) & 0xf;
if (rd != 15) {
- gen_movl_reg_T0(s, rd);
+ store_reg(s, rd, tmp);
+ } else {
+ tcg_temp_free_i32(tmp);
}
}
}
@@ -7956,9 +8706,45 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
goto illegal_op;
break;
}
+ op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
+ if (rs == 15) {
+ if (!(insn & (1 << 20))) {
+ goto illegal_op;
+ }
+ if (op != 2) {
+ /* Byte or halfword load space with dest == r15 : memory hints.
+ * Catch them early so we don't emit pointless addressing code.
+ * This space is a mix of:
+ * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
+ * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
+ * cores)
+ * unallocated hints, which must be treated as NOPs
+ * UNPREDICTABLE space, which we NOP or UNDEF depending on
+ * which is easiest for the decoding logic
+ * Some space which must UNDEF
+ */
+ int op1 = (insn >> 23) & 3;
+ int op2 = (insn >> 6) & 0x3f;
+ if (op & 2) {
+ goto illegal_op;
+ }
+ if (rn == 15) {
+ /* UNPREDICTABLE or unallocated hint */
+ return 0;
+ }
+ if (op1 & 1) {
+ return 0; /* PLD* or unallocated hint */
+ }
+ if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
+ return 0; /* PLD* or unallocated hint */
+ }
+ /* UNDEF space, or an UNPREDICTABLE */
+ return 1;
+ }
+ }
user = IS_USER(s);
if (rn == 15) {
- addr = new_tmp();
+ addr = tcg_temp_new_i32();
/* PC relative. */
/* s->pc has already been incremented by 4. */
imm = s->pc & 0xfffffffc;
@@ -7974,77 +8760,74 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
imm = insn & 0xfff;
tcg_gen_addi_i32(addr, addr, imm);
} else {
- op = (insn >> 8) & 7;
imm = insn & 0xff;
- switch (op) {
- case 0: case 8: /* Shifted Register. */
+ switch ((insn >> 8) & 0xf) {
+ case 0x0: /* Shifted Register. */
shift = (insn >> 4) & 0xf;
- if (shift > 3)
+ if (shift > 3) {
+ tcg_temp_free_i32(addr);
goto illegal_op;
+ }
tmp = load_reg(s, rm);
if (shift)
tcg_gen_shli_i32(tmp, tmp, shift);
tcg_gen_add_i32(addr, addr, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
break;
- case 4: /* Negative offset. */
+ case 0xc: /* Negative offset. */
tcg_gen_addi_i32(addr, addr, -imm);
break;
- case 6: /* User privilege. */
+ case 0xe: /* User privilege. */
tcg_gen_addi_i32(addr, addr, imm);
user = 1;
break;
- case 1: /* Post-decrement. */
+ case 0x9: /* Post-decrement. */
imm = -imm;
/* Fall through. */
- case 3: /* Post-increment. */
+ case 0xb: /* Post-increment. */
postinc = 1;
writeback = 1;
break;
- case 5: /* Pre-decrement. */
+ case 0xd: /* Pre-decrement. */
imm = -imm;
/* Fall through. */
- case 7: /* Pre-increment. */
+ case 0xf: /* Pre-increment. */
tcg_gen_addi_i32(addr, addr, imm);
writeback = 1;
break;
default:
+ tcg_temp_free_i32(addr);
goto illegal_op;
}
}
}
- op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
if (insn & (1 << 20)) {
/* Load. */
- if (rs == 15 && op != 2) {
- if (op & 2)
- goto illegal_op;
- /* Memory hint. Implemented as NOP. */
+ switch (op) {
+ case 0: tmp = gen_ld8u(addr, user); break;
+ case 4: tmp = gen_ld8s(addr, user); break;
+ case 1: tmp = gen_ld16u(addr, user); break;
+ case 5: tmp = gen_ld16s(addr, user); break;
+ case 2: tmp = gen_ld32(addr, user); break;
+ default:
+ tcg_temp_free_i32(addr);
+ goto illegal_op;
+ }
+ if (rs == 15) {
+ gen_bx(s, tmp);
} else {
- switch (op) {
- case 0: tmp = gen_ld8u(addr, user); break;
- case 4: tmp = gen_ld8s(addr, user); break;
- case 1: tmp = gen_ld16u(addr, user); break;
- case 5: tmp = gen_ld16s(addr, user); break;
- case 2: tmp = gen_ld32(addr, user); break;
- default: goto illegal_op;
- }
- if (rs == 15) {
- gen_bx(s, tmp);
- } else {
- store_reg(s, rs, tmp);
- }
+ store_reg(s, rs, tmp);
}
} else {
/* Store. */
- if (rs == 15)
- goto illegal_op;
tmp = load_reg(s, rs);
switch (op) {
case 0: gen_st8(tmp, addr, user); break;
case 1: gen_st16(tmp, addr, user); break;
case 2: gen_st32(tmp, addr, user); break;
- default: goto illegal_op;
+ default:
+ tcg_temp_free_i32(addr);
+ goto illegal_op;
}
}
if (postinc)
@@ -8052,7 +8835,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
if (writeback) {
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
}
}
break;
@@ -8075,9 +8858,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
if (s->condexec_mask) {
cond = s->condexec_cond;
- s->condlabel = gen_new_label();
- gen_test_cc(cond ^ 1, s->condlabel);
- s->condjmp = 1;
+ if (cond != 0x0e) { /* Skip conditional when condition is AL. */
+ s->condlabel = gen_new_label();
+ gen_test_cc(cond ^ 1, s->condlabel);
+ s->condjmp = 1;
+ }
}
insn = lduw_code(s->pc);
@@ -8090,32 +8875,35 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
switch (insn >> 12) {
case 0: case 1:
+
rd = insn & 7;
op = (insn >> 11) & 3;
if (op == 3) {
/* add/subtract */
rn = (insn >> 3) & 7;
- gen_movl_T0_reg(s, rn);
+ tmp = load_reg(s, rn);
if (insn & (1 << 10)) {
/* immediate */
- gen_op_movl_T1_im((insn >> 6) & 7);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
} else {
/* reg */
rm = (insn >> 6) & 7;
- gen_movl_T1_reg(s, rm);
+ tmp2 = load_reg(s, rm);
}
if (insn & (1 << 9)) {
if (s->condexec_mask)
- gen_op_subl_T0_T1();
+ tcg_gen_sub_i32(tmp, tmp, tmp2);
else
- gen_op_subl_T0_T1_cc();
+ gen_helper_sub_cc(tmp, tmp, tmp2);
} else {
if (s->condexec_mask)
- gen_op_addl_T0_T1();
+ tcg_gen_add_i32(tmp, tmp, tmp2);
else
- gen_op_addl_T0_T1_cc();
+ gen_helper_add_cc(tmp, tmp, tmp2);
}
- gen_movl_reg_T0(s, rd);
+ tcg_temp_free_i32(tmp2);
+ store_reg(s, rd, tmp);
} else {
/* shift immediate */
rm = (insn >> 3) & 7;
@@ -8131,35 +8919,40 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
/* arithmetic large immediate */
op = (insn >> 11) & 3;
rd = (insn >> 8) & 0x7;
- if (op == 0) {
- gen_op_movl_T0_im(insn & 0xff);
- } else {
- gen_movl_T0_reg(s, rd);
- gen_op_movl_T1_im(insn & 0xff);
- }
- switch (op) {
- case 0: /* mov */
+ if (op == 0) { /* mov */
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, insn & 0xff);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
- break;
- case 1: /* cmp */
- gen_op_subl_T0_T1_cc();
- break;
- case 2: /* add */
- if (s->condexec_mask)
- gen_op_addl_T0_T1();
- else
- gen_op_addl_T0_T1_cc();
- break;
- case 3: /* sub */
- if (s->condexec_mask)
- gen_op_subl_T0_T1();
- else
- gen_op_subl_T0_T1_cc();
- break;
+ gen_logic_CC(tmp);
+ store_reg(s, rd, tmp);
+ } else {
+ tmp = load_reg(s, rd);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp2, insn & 0xff);
+ switch (op) {
+ case 1: /* cmp */
+ gen_helper_sub_cc(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
+ case 2: /* add */
+ if (s->condexec_mask)
+ tcg_gen_add_i32(tmp, tmp, tmp2);
+ else
+ gen_helper_add_cc(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ store_reg(s, rd, tmp);
+ break;
+ case 3: /* sub */
+ if (s->condexec_mask)
+ tcg_gen_sub_i32(tmp, tmp, tmp2);
+ else
+ gen_helper_sub_cc(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ store_reg(s, rd, tmp);
+ break;
+ }
}
- if (op != 1)
- gen_movl_reg_T0(s, rd);
break;
case 4:
if (insn & (1 << 11)) {
@@ -8167,10 +8960,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
/* load pc-relative. Bit 1 of PC is ignored. */
val = s->pc + 2 + ((insn & 0xff) * 4);
val &= ~(uint32_t)2;
- addr = new_tmp();
+ addr = tcg_temp_new_i32();
tcg_gen_movi_i32(addr, val);
tmp = gen_ld32(addr, IS_USER(s));
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
store_reg(s, rd, tmp);
break;
}
@@ -8181,28 +8974,33 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
op = (insn >> 8) & 3;
switch (op) {
case 0: /* add */
- gen_movl_T0_reg(s, rd);
- gen_movl_T1_reg(s, rm);
- gen_op_addl_T0_T1();
- gen_movl_reg_T0(s, rd);
+ tmp = load_reg(s, rd);
+ tmp2 = load_reg(s, rm);
+ tcg_gen_add_i32(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ store_reg(s, rd, tmp);
break;
case 1: /* cmp */
- gen_movl_T0_reg(s, rd);
- gen_movl_T1_reg(s, rm);
- gen_op_subl_T0_T1_cc();
+ tmp = load_reg(s, rd);
+ tmp2 = load_reg(s, rm);
+ gen_helper_sub_cc(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp);
break;
case 2: /* mov/cpy */
- gen_movl_T0_reg(s, rm);
- gen_movl_reg_T0(s, rd);
+ tmp = load_reg(s, rm);
+ store_reg(s, rd, tmp);
break;
case 3:/* branch [and link] exchange thumb register */
tmp = load_reg(s, rm);
if (insn & (1 << 7)) {
+ ARCH(5);
val = (uint32_t)s->pc | 1;
- tmp2 = new_tmp();
+ tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, val);
store_reg(s, 14, tmp2);
}
+ /* already thumb, no need to check */
gen_bx(s, tmp);
break;
}
@@ -8223,114 +9021,126 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
val = 0;
}
- if (op == 9) /* neg */
- gen_op_movl_T0_im(0);
- else if (op != 0xf) /* mvn doesn't read its first operand */
- gen_movl_T0_reg(s, rd);
+ if (op == 9) { /* neg */
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ } else if (op != 0xf) { /* mvn doesn't read its first operand */
+ tmp = load_reg(s, rd);
+ } else {
+ TCGV_UNUSED(tmp);
+ }
- gen_movl_T1_reg(s, rm);
+ tmp2 = load_reg(s, rm);
switch (op) {
case 0x0: /* and */
- gen_op_andl_T0_T1();
+ tcg_gen_and_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
+ gen_logic_CC(tmp);
break;
case 0x1: /* eor */
- gen_op_xorl_T0_T1();
+ tcg_gen_xor_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
+ gen_logic_CC(tmp);
break;
case 0x2: /* lsl */
if (s->condexec_mask) {
- gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]);
+ gen_helper_shl(tmp2, tmp2, tmp);
} else {
- gen_helper_shl_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
- gen_op_logic_T1_cc();
+ gen_helper_shl_cc(tmp2, tmp2, tmp);
+ gen_logic_CC(tmp2);
}
break;
case 0x3: /* lsr */
if (s->condexec_mask) {
- gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]);
+ gen_helper_shr(tmp2, tmp2, tmp);
} else {
- gen_helper_shr_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
- gen_op_logic_T1_cc();
+ gen_helper_shr_cc(tmp2, tmp2, tmp);
+ gen_logic_CC(tmp2);
}
break;
case 0x4: /* asr */
if (s->condexec_mask) {
- gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]);
+ gen_helper_sar(tmp2, tmp2, tmp);
} else {
- gen_helper_sar_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
- gen_op_logic_T1_cc();
+ gen_helper_sar_cc(tmp2, tmp2, tmp);
+ gen_logic_CC(tmp2);
}
break;
case 0x5: /* adc */
if (s->condexec_mask)
- gen_adc_T0_T1();
+ gen_adc(tmp, tmp2);
else
- gen_op_adcl_T0_T1_cc();
+ gen_helper_adc_cc(tmp, tmp, tmp2);
break;
case 0x6: /* sbc */
if (s->condexec_mask)
- gen_sbc_T0_T1();
+ gen_sub_carry(tmp, tmp, tmp2);
else
- gen_op_sbcl_T0_T1_cc();
+ gen_helper_sbc_cc(tmp, tmp, tmp2);
break;
case 0x7: /* ror */
if (s->condexec_mask) {
- gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]);
+ tcg_gen_andi_i32(tmp, tmp, 0x1f);
+ tcg_gen_rotr_i32(tmp2, tmp2, tmp);
} else {
- gen_helper_ror_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
- gen_op_logic_T1_cc();
+ gen_helper_ror_cc(tmp2, tmp2, tmp);
+ gen_logic_CC(tmp2);
}
break;
case 0x8: /* tst */
- gen_op_andl_T0_T1();
- gen_op_logic_T0_cc();
+ tcg_gen_and_i32(tmp, tmp, tmp2);
+ gen_logic_CC(tmp);
rd = 16;
break;
case 0x9: /* neg */
if (s->condexec_mask)
- tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
+ tcg_gen_neg_i32(tmp, tmp2);
else
- gen_op_subl_T0_T1_cc();
+ gen_helper_sub_cc(tmp, tmp, tmp2);
break;
case 0xa: /* cmp */
- gen_op_subl_T0_T1_cc();
+ gen_helper_sub_cc(tmp, tmp, tmp2);
rd = 16;
break;
case 0xb: /* cmn */
- gen_op_addl_T0_T1_cc();
+ gen_helper_add_cc(tmp, tmp, tmp2);
rd = 16;
break;
case 0xc: /* orr */
- gen_op_orl_T0_T1();
+ tcg_gen_or_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
+ gen_logic_CC(tmp);
break;
case 0xd: /* mul */
- gen_op_mull_T0_T1();
+ tcg_gen_mul_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
+ gen_logic_CC(tmp);
break;
case 0xe: /* bic */
- gen_op_bicl_T0_T1();
+ tcg_gen_andc_i32(tmp, tmp, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T0_cc();
+ gen_logic_CC(tmp);
break;
case 0xf: /* mvn */
- gen_op_notl_T1();
+ tcg_gen_not_i32(tmp2, tmp2);
if (!s->condexec_mask)
- gen_op_logic_T1_cc();
+ gen_logic_CC(tmp2);
val = 1;
rm = rd;
break;
}
if (rd != 16) {
- if (val)
- gen_movl_reg_T1(s, rm);
- else
- gen_movl_reg_T0(s, rd);
+ if (val) {
+ store_reg(s, rm, tmp2);
+ if (op != 0xf)
+ tcg_temp_free_i32(tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ tcg_temp_free_i32(tmp2);
+ }
+ } else {
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
}
break;
@@ -8343,7 +9153,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
addr = load_reg(s, rn);
tmp = load_reg(s, rm);
tcg_gen_add_i32(addr, addr, tmp);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
if (op < 3) /* store */
tmp = load_reg(s, rd);
@@ -8376,7 +9186,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
}
if (op >= 3) /* load */
store_reg(s, rd, tmp);
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
break;
case 6:
@@ -8396,7 +9206,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, rd);
gen_st32(tmp, addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
break;
case 7:
@@ -8416,7 +9226,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, rd);
gen_st8(tmp, addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
break;
case 8:
@@ -8436,7 +9246,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, rd);
gen_st16(tmp, addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
break;
case 9:
@@ -8455,7 +9265,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, rd);
gen_st32(tmp, addr, IS_USER(s));
}
- dead_tmp(addr);
+ tcg_temp_free_i32(addr);
break;
case 10:
@@ -8466,7 +9276,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, 13);
} else {
/* PC. bit 1 is ignored. */
- tmp = new_tmp();
+ tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
}
val = (insn & 0xff) * 4;
@@ -8515,6 +9325,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
if ((insn & (1 << 11)) == 0) {
tcg_gen_addi_i32(addr, addr, -offset);
}
+ tmp2 = tcg_const_i32(4);
for (i = 0; i < 8; i++) {
if (insn & (1 << i)) {
if (insn & (1 << 11)) {
@@ -8527,7 +9338,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
gen_st32(tmp, addr, IS_USER(s));
}
/* advance to the next address. */
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp2);
}
}
TCGV_UNUSED(tmp);
@@ -8542,16 +9353,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tmp = load_reg(s, 14);
gen_st32(tmp, addr, IS_USER(s));
}
- tcg_gen_addi_i32(addr, addr, 4);
+ tcg_gen_add_i32(addr, addr, tmp2);
}
+ tcg_temp_free_i32(tmp2);
if ((insn & (1 << 11)) == 0) {
tcg_gen_addi_i32(addr, addr, -offset);
}
/* write back the new stack pointer */
store_reg(s, 13, addr);
/* set the new PC value */
- if ((insn & 0x0900) == 0x0900)
- gen_bx(s, tmp);
+ if ((insn & 0x0900) == 0x0900) {
+ store_reg_from_load(env, s, 15, tmp);
+ }
break;
case 1: case 3: case 9: case 11: /* czb */
@@ -8563,7 +9376,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
else
tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
- dead_tmp(tmp);
+ tcg_temp_free_i32(tmp);
offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
val = (uint32_t)s->pc + 2;
val += offset;
@@ -8582,10 +9395,8 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
break;
case 0xe: /* bkpt */
- gen_set_condexec(s);
- gen_set_pc_im(s->pc - 2);
- gen_exception(EXCP_BKPT);
- s->is_jmp = DISAS_JUMP;
+ ARCH(5);
+ gen_exception_insn(s, 2, EXCP_BKPT);
break;
case 0xa: /* rev */
@@ -8612,22 +9423,22 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
if (insn & 1) {
addr = tcg_const_i32(16);
gen_helper_v7m_msr(cpu_env, addr, tmp);
+ tcg_temp_free_i32(addr);
}
/* FAULTMASK */
if (insn & 2) {
addr = tcg_const_i32(17);
gen_helper_v7m_msr(cpu_env, addr, tmp);
+ tcg_temp_free_i32(addr);
}
+ tcg_temp_free_i32(tmp);
gen_lookup_tb(s);
} else {
if (insn & (1 << 4))
shift = CPSR_A | CPSR_I | CPSR_F;
else
shift = 0;
-
- val = ((insn & 7) << 6) & shift;
- gen_op_movl_T0_im(val);
- gen_set_psr_T0(s, shift, 0);
+ gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
}
break;
@@ -8637,7 +9448,10 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
break;
case 12:
+ {
/* load/store multiple */
+ TCGv loaded_var;
+ TCGV_UNUSED(loaded_var);
rn = (insn >> 8) & 0x7;
addr = load_reg(s, rn);
for (i = 0; i < 8; i++) {
@@ -8645,7 +9459,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
if (insn & (1 << 11)) {
/* load */
tmp = gen_ld32(addr, IS_USER(s));
- store_reg(s, i, tmp);
+ if (i == rn) {
+ loaded_var = tmp;
+ } else {
+ store_reg(s, i, tmp);
+ }
} else {
/* store */
tmp = load_reg(s, i);
@@ -8655,14 +9473,18 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
tcg_gen_addi_i32(addr, addr, 4);
}
}
- /* Base register writeback. */
if ((insn & (1 << rn)) == 0) {
+ /* base reg not in list: base register writeback */
store_reg(s, rn, addr);
} else {
- dead_tmp(addr);
+ /* base reg in list: if load, complete it now */
+ if (insn & (1 << 11)) {
+ store_reg(s, rn, loaded_var);
+ }
+ tcg_temp_free_i32(addr);
}
break;
-
+ }
case 13:
/* conditional branch or swi */
cond = (insn >> 8) & 0xf;
@@ -8671,7 +9493,6 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
if (cond == 0xf) {
/* swi */
- gen_set_condexec(s);
gen_set_pc_im(s->pc);
s->is_jmp = DISAS_SWI;
break;
@@ -8680,7 +9501,6 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
s->condlabel = gen_new_label();
gen_test_cc(cond ^ 1, s->condlabel);
s->condjmp = 1;
- gen_movl_T1_reg(s, 15);
/* jump to the offset */
val = (uint32_t)s->pc + 2;
@@ -8709,17 +9529,11 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
}
return;
undef32:
- gen_set_condexec(s);
- gen_set_pc_im(s->pc - 4);
- gen_exception(EXCP_UDEF);
- s->is_jmp = DISAS_JUMP;
+ gen_exception_insn(s, 4, EXCP_UDEF);
return;
illegal_op:
undef:
- gen_set_condexec(s);
- gen_set_pc_im(s->pc - 2);
- gen_exception(EXCP_UDEF);
- s->is_jmp = DISAS_JUMP;
+ gen_exception_insn(s, 2, EXCP_UDEF);
}
/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -8739,9 +9553,6 @@ static inline void gen_intermediate_code_internal(CPUState *env,
int max_insns;
/* generate intermediate code */
- num_temps = 0;
- memset(temps, 0, sizeof(temps));
-
pc_start = tb->pc;
dc->tb = tb;
@@ -8752,18 +9563,16 @@ static inline void gen_intermediate_code_internal(CPUState *env,
dc->pc = pc_start;
dc->singlestep_enabled = env->singlestep_enabled;
dc->condjmp = 0;
- dc->thumb = env->thumb;
- dc->condexec_mask = (env->condexec_bits & 0xf) << 1;
- dc->condexec_mask_prev = dc->condexec_mask;
- dc->condexec_cond = env->condexec_bits >> 4;
+ dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
+ dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
+ dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
#if !defined(CONFIG_USER_ONLY)
- if (IS_M(env)) {
- dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1));
- } else {
- dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
- }
+ dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
#endif
ANDROID_START_CODEGEN(search_pc);
+ dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
+ dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
+ dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
cpu_F0s = tcg_temp_new_i32();
cpu_F1s = tcg_temp_new_i32();
cpu_F0d = tcg_temp_new_i64();
@@ -8782,6 +9591,47 @@ static inline void gen_intermediate_code_internal(CPUState *env,
gen_icount_start();
ANDROID_TRACE_START_BB();
+ tcg_clear_temp_count();
+
+ /* A note on handling of the condexec (IT) bits:
+ *
+ * We want to avoid the overhead of having to write the updated condexec
+ * bits back to the CPUState for every instruction in an IT block. So:
+ * (1) if the condexec bits are not already zero then we write
+ * zero back into the CPUState now. This avoids complications trying
+ * to do it at the end of the block. (For example if we don't do this
+ * it's hard to identify whether we can safely skip writing condexec
+ * at the end of the TB, which we definitely want to do for the case
+ * where a TB doesn't do anything with the IT state at all.)
+ * (2) if we are going to leave the TB then we call gen_set_condexec()
+ * which will write the correct value into CPUState if zero is wrong.
+ * This is done both for leaving the TB at the end, and for leaving
+ * it because of an exception we know will happen, which is done in
+ * gen_exception_insn(). The latter is necessary because we need to
+ * leave the TB with the PC/IT state just prior to execution of the
+ * instruction which caused the exception.
+ * (3) if we leave the TB unexpectedly (eg a data abort on a load)
+ * then the CPUState will be wrong and we need to reset it.
+ * This is handled in the same way as restoration of the
+ * PC in these situations: we will be called again with search_pc=1
+ * and generate a mapping of the condexec bits for each PC in
+ * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
+ * this to restore the condexec bits.
+ *
+ * Note that there are no instructions which can read the condexec
+ * bits, and none which can write non-static values to them, so
+ * we don't need to care about whether CPUState is correct in the
+ * middle of a TB.
+ */
+
+ /* Reset the conditional execution bits immediately. This avoids
+ complications trying to do it at the end of the block. */
+ if (dc->condexec_mask || dc->condexec_cond)
+ {
+ TCGv tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ store_cpu_field(tmp, condexec_bits);
+ }
do {
#ifdef CONFIG_USER_ONLY
/* Intercept jump to the magic kernel page. */
@@ -8805,10 +9655,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
if (bp->pc == dc->pc) {
- gen_set_condexec(dc);
- gen_set_pc_im(dc->pc);
- gen_exception(EXCP_DEBUG);
- dc->is_jmp = DISAS_JUMP;
+ gen_exception_insn(dc, 0, EXCP_DEBUG);
/* Advance PC so that clearing the breakpoint will
invalidate this TB. */
dc->pc += 2;
@@ -8826,6 +9673,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
gen_opc_instr_start[lj++] = 0;
}
gen_opc_pc[lj] = dc->pc;
+ gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
gen_opc_instr_start[lj] = 1;
gen_opc_icount[lj] = num_insns;
}
@@ -8833,9 +9681,12 @@ static inline void gen_intermediate_code_internal(CPUState *env,
if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
gen_io_start();
- if (env->thumb) {
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+ tcg_gen_debug_insn_start(dc->pc);
+ }
+
+ if (dc->thumb) {
disas_thumb_insn(env, dc);
- dc->condexec_mask_prev = dc->condexec_mask;
if (dc->condexec_mask) {
dc->condexec_cond = (dc->condexec_cond & 0xe)
| ((dc->condexec_mask >> 4) & 1);
@@ -8847,16 +9698,16 @@ static inline void gen_intermediate_code_internal(CPUState *env,
} else {
disas_arm_insn(env, dc);
}
- if (num_temps) {
- fprintf(stderr, "Internal resource leak before %08x (%d temps)\n", dc->pc, num_temps);
- tcg_dump_ops(&tcg_ctx, stderr);
- num_temps = 0;
- }
if (dc->condjmp && !dc->is_jmp) {
gen_set_label(dc->condlabel);
dc->condjmp = 0;
}
+
+ if (tcg_check_temp_count()) {
+ fprintf(stderr, "TCG temporary leak before %08x\n", dc->pc);
+ }
+
/* Translation stops when a conditional branch is encountered.
* Otherwise the subsequent code could get translated several times.
* Also stop translation when a page boundary is reached. This
@@ -8888,6 +9739,8 @@ static inline void gen_intermediate_code_internal(CPUState *env,
gen_set_condexec(dc);
if (dc->is_jmp == DISAS_SWI) {
gen_exception(EXCP_SWI);
+ } else if (dc->is_jmp == DISAS_SMC) {
+ gen_exception(EXCP_SMC);
} else {
gen_exception(EXCP_DEBUG);
}
@@ -8900,6 +9753,8 @@ static inline void gen_intermediate_code_internal(CPUState *env,
gen_set_condexec(dc);
if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
gen_exception(EXCP_SWI);
+ } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) {
+ gen_exception(EXCP_SMC);
} else {
/* FIXME: Single stepping a WFI insn will not halt
the CPU. */
@@ -8934,6 +9789,9 @@ static inline void gen_intermediate_code_internal(CPUState *env,
case DISAS_SWI:
gen_exception(EXCP_SWI);
break;
+ case DISAS_SMC:
+ gen_exception(EXCP_SMC);
+ break;
}
if (dc->condjmp) {
gen_set_label(dc->condlabel);
@@ -8951,7 +9809,7 @@ done_generating:
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
- log_target_disas(pc_start, dc->pc - pc_start, env->thumb);
+ log_target_disas(pc_start, dc->pc - pc_start, dc->thumb);
qemu_log("\n");
}
#endif
@@ -8982,8 +9840,7 @@ static const char *cpu_mode_names[16] = {
"???", "???", "???", "und", "???", "???", "???", "sys"
};
-void cpu_dump_state(CPUState *env, FILE *f,
- int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
int flags)
{
int i;
@@ -9038,4 +9895,5 @@ void cpu_dump_state(CPUState *env, FILE *f,
void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
{
env->regs[15] = gen_opc_pc[pc_pos];
+ env->condexec_bits = gen_opc_condexec_bits[pc_pos];
}