aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll2
-rw-r--r--test/CodeGen/ARM/2010-05-18-PostIndexBug.ll2
-rw-r--r--test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll2
-rw-r--r--test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll2
-rw-r--r--test/CodeGen/ARM/2010-11-29-PrologueBug.ll4
-rw-r--r--test/CodeGen/ARM/2010-12-07-PEIBug.ll2
-rw-r--r--test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll2
-rw-r--r--test/CodeGen/ARM/2011-06-16-TailCallByVal.ll2
-rw-r--r--test/CodeGen/ARM/2011-08-25-ldmia_ret.ll2
-rw-r--r--test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll10
-rw-r--r--test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll55
-rw-r--r--test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll105
-rw-r--r--test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll67
-rw-r--r--test/CodeGen/ARM/2012-01-26-CoalescerBug.ll21
-rw-r--r--test/CodeGen/ARM/2012-01-26-CopyPropKills.ll121
-rw-r--r--test/CodeGen/ARM/2012-02-01-CoalescerBug.ll26
-rw-r--r--test/CodeGen/ARM/arm-returnaddr.ll8
-rw-r--r--test/CodeGen/ARM/atomic-op.ll88
-rw-r--r--test/CodeGen/ARM/avoid-cpsr-rmw.ll6
-rw-r--r--test/CodeGen/ARM/call-tc.ll4
-rw-r--r--test/CodeGen/ARM/code-placement.ll6
-rw-r--r--test/CodeGen/ARM/cse-call.ll31
-rw-r--r--test/CodeGen/ARM/cse-libcalls.ll4
-rw-r--r--test/CodeGen/ARM/ctor_order.ll10
-rw-r--r--test/CodeGen/ARM/debug-info-arg.ll2
-rw-r--r--test/CodeGen/ARM/debug-info-blocks.ll2
-rw-r--r--test/CodeGen/ARM/debug-info-sreg2.ll6
-rw-r--r--test/CodeGen/ARM/dg.exp5
-rw-r--r--test/CodeGen/ARM/ehabi-unwind.ll16
-rw-r--r--test/CodeGen/ARM/fast-isel-binary.ll116
-rw-r--r--test/CodeGen/ARM/fast-isel-br-const.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-br-phi.ll44
-rw-r--r--test/CodeGen/ARM/fast-isel-call.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-cmp-imm.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-conversion.ll150
-rw-r--r--test/CodeGen/ARM/fast-isel-deadcode.ll5
-rw-r--r--test/CodeGen/ARM/fast-isel-icmp.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-indirectbr.ll17
-rw-r--r--test/CodeGen/ARM/fast-isel-intrinsic.ll52
-rw-r--r--test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-mvn.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-redefinition.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-ret.ll13
-rw-r--r--test/CodeGen/ARM/fast-isel-select.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel.ll26
-rw-r--r--test/CodeGen/ARM/fcopysign.ll6
-rw-r--r--test/CodeGen/ARM/fpcmp-opt.ll63
-rw-r--r--test/CodeGen/ARM/fusedMAC.ll100
-rw-r--r--test/CodeGen/ARM/hello.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt1.ll6
-rw-r--r--test/CodeGen/ARM/ifcvt10.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt3.ll7
-rw-r--r--test/CodeGen/ARM/ifcvt5.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt6.ll2
-rw-r--r--test/CodeGen/ARM/insn-sched1.ll2
-rw-r--r--test/CodeGen/ARM/ldrd.ll46
-rw-r--r--test/CodeGen/ARM/lit.local.cfg13
-rw-r--r--test/CodeGen/ARM/load_i1_select.ll19
-rw-r--r--test/CodeGen/ARM/log2_not_readnone.ll15
-rw-r--r--test/CodeGen/ARM/lsr-unfolded-offset.ll2
-rw-r--r--test/CodeGen/ARM/machine-cse-cmp.ll31
-rw-r--r--test/CodeGen/ARM/memfunc.ll3
-rw-r--r--test/CodeGen/ARM/neon_spill.ll53
-rw-r--r--test/CodeGen/ARM/odr_comdat.ll16
-rw-r--r--test/CodeGen/ARM/rev.ll4
-rw-r--r--test/CodeGen/ARM/select_xform.ll46
-rw-r--r--test/CodeGen/ARM/shifter_operand.ll8
-rw-r--r--test/CodeGen/ARM/spill-q.ll2
-rw-r--r--test/CodeGen/ARM/tail-dup.ll44
-rw-r--r--test/CodeGen/ARM/test-sharedidx.ll96
-rw-r--r--test/CodeGen/ARM/vmov.ll17
-rw-r--r--test/CodeGen/ARM/vst2.ll18
73 files changed, 1532 insertions, 161 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
; THUMB: t:
; THUMB-NOT: str r0, [r1], r0
-; THUMB: str r2, [r1]
+; THUMB: str r1, [r0]
%0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
store i32 0, i32* inttoptr (i32 8 to i32*), align 8
br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index a65cf4b..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=basic
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
; This test would crash the rewriter when trying to handle a spill after one of
; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
; CHECK: vld1.64 {d16, d17}, [r{{.}}]
; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
define i32 @test(i8* %arg) nounwind {
entry:
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
; rdar://8690640
define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..23e1aa1 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
; rdar://8728956
define hidden void @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
%struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
%struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
; Test that ldmia_ret preserves implicit operands for return values.
;
; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86e8712..6fbae19 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
; CHECK: movw r1, :lower16:{{.*}}
; CHECK: movt r1, :upper16:{{.*}}
-; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
-; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]]
-; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]]
-; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]]
-; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]]
+; CHECK: vldmia r1
+; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
; CHECK: vstmia {{.*}}
L.entry:
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH. In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+ %tmp = alloca i32, align 4
+ %tmp1 = alloca i32, align 4
+ %tmp2 = alloca i8*, align 4
+ %tmp3 = alloca i1
+ %myException = alloca %0*, align 4
+ %tmp4 = alloca i8*
+ %tmp5 = alloca i32
+ %exception = alloca %0*, align 4
+ store i32 %a, i32* %tmp, align 4
+ store i32 %b, i32* %tmp1, align 4
+ store i8* %d, i8** %tmp2, align 4
+ store i1 false, i1* %tmp3
+ %tmp7 = load i8** %c
+ %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+ to label %bb11 unwind label %bb15
+
+bb11: ; preds = %bb
+ store %0* %tmp10, %0** %myException, align 4
+ %tmp12 = load %0** %myException, align 4
+ %tmp13 = bitcast %0* %tmp12 to i8*
+ invoke void @objc_exception_throw(i8* %tmp13) noreturn
+ to label %bb14 unwind label %bb15
+
+bb14: ; preds = %bb11
+ unreachable
+
+bb15: ; preds = %bb11, %bb
+ %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+ catch i8* null
+ %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+ store i8* %tmp17, i8** %tmp4
+ %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+ store i32 %tmp18, i32* %tmp5
+ store i1 true, i1* %tmp3
+ br label %bb56
+
+bb56:
+ unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+ br i1 undef, label %bb1, label %bb2
+
+bb1: ; preds = %bb
+ unreachable
+
+bb2: ; preds = %bb
+ br label %bb3
+
+bb3: ; preds = %bb4, %bb2
+ %tmp = icmp slt i32 undef, undef
+ br i1 %tmp, label %bb4, label %bb67
+
+bb4: ; preds = %bb3
+ %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+ %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+ %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+ %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+ %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+ %tmp10 = fmul <4 x float> undef, %tmp9
+ %tmp11 = fadd <4 x float> undef, %tmp10
+ %tmp12 = bitcast <4 x float> zeroinitializer to i128
+ %tmp13 = lshr i128 %tmp12, 64
+ %tmp14 = trunc i128 %tmp13 to i64
+ %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+ %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+ %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+ %tmp18 = fmul <4 x float> %tmp17, %tmp16
+ %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+ %tmp20 = fmul <4 x float> %tmp19, %tmp18
+ %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+ %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+ call arm_aapcs_vfpcc void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+ %tmp23 = bitcast <4 x float> %tmp22 to i128
+ %tmp24 = trunc i128 %tmp23 to i64
+ %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+ %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+ %tmp27 = load float* undef, align 4, !tbaa !2
+ %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+ %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+ %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+ %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+ %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+ %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+ %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+ %tmp35 = fmul <4 x float> %tmp34, undef
+ %tmp36 = fmul <4 x float> %tmp35, undef
+ %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
+ %tmp38 = load float* undef, align 4, !tbaa !2
+ %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+ %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
+ %tmp41 = load float* undef, align 4, !tbaa !2
+ %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+ %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+ %tmp44 = fmul <4 x float> %tmp33, %tmp43
+ %tmp45 = fadd <4 x float> %tmp42, %tmp44
+ %tmp46 = fsub <4 x float> %tmp45, undef
+ %tmp47 = fmul <4 x float> %tmp46, %tmp36
+ %tmp48 = fadd <4 x float> undef, %tmp47
+ %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
+ %tmp50 = load float* undef, align 4, !tbaa !2
+ %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+ %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+ %tmp54 = load float* %tmp52, align 4, !tbaa !2
+ %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+ %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+ %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+ %tmp58 = fmul <4 x float> undef, %tmp57
+ %tmp59 = fsub <4 x float> %tmp51, %tmp48
+ %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+ %tmp61 = fmul <4 x float> %tmp59, %tmp60
+ %tmp62 = fadd <4 x float> %tmp48, %tmp61
+ call arm_aapcs_vfpcc void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+ %tmp63 = bitcast <4 x float> %tmp62 to i128
+ %tmp64 = lshr i128 %tmp63, 64
+ %tmp65 = trunc i128 %tmp64 to i64
+ %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+ call arm_aapcs_vfpcc void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+ br label %bb3
+
+bb67: ; preds = %bb3
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+ %tmp = load <2 x float>* undef, align 8, !tbaa !0
+ %tmp2 = extractelement <2 x float> %tmp, i32 0
+ %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+ %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+ %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+ %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+ %tmp7 = extractelement <2 x float> %tmp, i32 1
+ %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+ %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+ %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+ %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+ %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+ %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+ %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+ %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+ %tmp18 = extractelement <2 x float> %tmp17, i32 0
+ tail call arm_aapcs_vfpcc void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+ %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+ %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+ %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+ %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+ %tmp26 = extractelement <2 x float> %tmp25, i32 0
+ tail call arm_aapcs_vfpcc void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+ ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+ br i1 undef, label %for.end, label %cond.end295
+
+cond.end295: ; preds = %entry
+ %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+ %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+ %1 = bitcast <4 x float> undef to <2 x i64>
+ %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+ tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+ tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+ unreachable
+
+for.end: ; preds = %entry
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+ %3 = bitcast <4 x float> %2 to <2 x i64>
+ %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+ %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+ %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+ %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+ %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+ %9 = bitcast <2 x float> %7 to <1 x i64>
+ %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+ %11 = bitcast <2 x i64> %10 to <4 x float>
+ br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+ br i1 undef, label %bb1, label %bb2
+
+bb1: ; preds = %bb
+ unreachable
+
+bb2: ; preds = %bb
+ br i1 undef, label %bb92, label %bb3
+
+bb3: ; preds = %bb2
+ %tmp = or <4 x i32> undef, undef
+ %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+ %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+ %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+ %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+ %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+ %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+ %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+ %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+ %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+ %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+ %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+ %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+ %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+ %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+ %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+ %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+ %tmp22 = or <2 x i32> %tmp19, %tmp21
+ %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+ %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+ %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+ %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+ %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+ %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+ %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+ %tmp30 = or <2 x i32> %tmp28, %tmp29
+ %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+ %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+ %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+ %tmp34 = fadd <4 x float> %tmp33, %tmp32
+ %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+ %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+ %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+ %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+ %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+ %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+ %tmp43 = fmul <4 x float> %tmp42, %tmp41
+ %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+ %tmp45 = fadd <4 x float> undef, %tmp43
+ %tmp46 = fadd <4 x float> undef, %tmp45
+ %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+ %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+ %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+ %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp51 = fmul <4 x float> %tmp42, %tmp50
+ %tmp52 = fmul <4 x float> %tmp44, undef
+ %tmp53 = fadd <4 x float> %tmp52, %tmp51
+ %tmp54 = fadd <4 x float> undef, %tmp53
+ %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+ %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+ %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+ %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+ %tmp59 = fmul <4 x float> undef, %tmp58
+ %tmp60 = fadd <4 x float> %tmp59, undef
+ %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+ %tmp62 = load void (i8*, i8*)** undef, align 4
+ call arm_aapcs_vfpcc void %tmp62(i8* sret undef, i8* undef) nounwind
+ %tmp63 = bitcast <4 x float> %tmp46 to i128
+ %tmp64 = bitcast <4 x float> %tmp54 to i128
+ %tmp65 = bitcast <4 x float> %tmp61 to i128
+ %tmp66 = lshr i128 %tmp63, 64
+ %tmp67 = trunc i128 %tmp66 to i64
+ %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+ %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+ %tmp70 = lshr i128 %tmp64, 64
+ %tmp71 = trunc i128 %tmp70 to i64
+ %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+ %tmp73 = trunc i128 %tmp65 to i64
+ %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+ %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+ %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+ %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+ call arm_aapcs_vfpcc void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+ %tmp78 = call arm_aapcs_vfpcc i8* null(i8* null) nounwind
+ %tmp79 = bitcast i8* %tmp78 to i512*
+ %tmp80 = load i512* %tmp79, align 16
+ %tmp81 = lshr i512 %tmp80, 128
+ %tmp82 = trunc i512 %tmp80 to i128
+ %tmp83 = trunc i512 %tmp81 to i128
+ %tmp84 = bitcast i128 %tmp83 to <4 x float>
+ %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+ %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+ %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+ %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %tmp89 = fmul <4 x float> undef, %tmp88
+ %tmp90 = fadd <4 x float> %tmp89, undef
+ %tmp91 = fadd <4 x float> undef, %tmp90
+ store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+ unreachable
+
+bb92: ; preds = %bb2
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+ %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+ %5 = extractelement <4 x float> %4, i32 0
+ %6 = extractelement <4 x float> %4, i32 1
+ %7 = extractelement <4 x float> %4, i32 2
+ %8 = insertelement <4 x float> undef, float %5, i32 0
+ %9 = insertelement <4 x float> %8, float %6, i32 1
+ %10 = insertelement <4 x float> %9, float %7, i32 2
+ %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+ store <4 x float> %11, <4 x float>* undef, align 16
+ call arm_aapcs_vfpcc void @baz(%1* undef, float 0.000000e+00) nounwind
+ ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
; rdar://8015977
; rdar://8020118
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
define void @func(i32 %argc, i8** %argv) nounwind {
entry:
@@ -61,7 +61,7 @@ entry:
; CHECK: strex
%7 = atomicrmw min i32* %val2, i32 16 monotonic
store i32 %7, i32* %old
- %neg = sub i32 0, 1 ; <i32> [#uses=1]
+ %neg = sub i32 0, 1
; CHECK: ldrex
; CHECK: cmp
; CHECK: strex
@@ -77,5 +77,85 @@ entry:
; CHECK: strex
%10 = atomicrmw max i32* %val2, i32 0 monotonic
store i32 %10, i32* %old
- ret void
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+ store i32 %11, i32* %old
+ %uneg = sub i32 0, 1
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+ store i32 %12, i32* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+ store i32 %13, i32* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+ store i32 %14, i32* %old
+
+ ret void
+}
+
+define void @func2() nounwind {
+entry:
+ %val = alloca i16
+ %old = alloca i16
+ store i16 31, i16* %val
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %0 = atomicrmw umin i16* %val, i16 16 monotonic
+ store i16 %0, i16* %old
+ %uneg = sub i16 0, 1
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+ store i16 %1, i16* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %2 = atomicrmw umax i16* %val, i16 1 monotonic
+ store i16 %2, i16* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %3 = atomicrmw umax i16* %val, i16 0 monotonic
+ store i16 %3, i16* %old
+ ret void
+}
+
+define void @func3() nounwind {
+entry:
+ %val = alloca i8
+ %old = alloca i8
+ store i8 31, i8* %val
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %0 = atomicrmw umin i8* %val, i8 16 monotonic
+ store i8 %0, i8* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %uneg = sub i8 0, 1
+ %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+ store i8 %1, i8* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %2 = atomicrmw umax i8* %val, i8 1 monotonic
+ store i8 %2, i8* %old
+ ; CHECK: ldrex
+ ; CHECK: cmp
+ ; CHECK: strex
+ %3 = atomicrmw umax i8* %val, i8 0 monotonic
+ store i8 %3, i8* %old
+ ret void
}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 877ec18..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -6,9 +6,9 @@
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
entry:
; CHECK: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
%0 = mul nsw i32 %a, %b
%1 = mul nsw i32 %c, %d
%2 = mul nsw i32 %0, %1
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..be3e105 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
; Enable tailcall optimization for iOS 5.0
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+ br i1 undef, label %while.end42, label %while.body37
+
+while.body37: ; preds = %while.body37, %entry
+ br i1 false, label %while.end42, label %while.body37
+
+while.end42: ; preds = %while.body37, %entry
+ %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+ %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+ tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+ %call47 = tail call i32 @strlen(i8* %.) nounwind
+ unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
; Without CSE of libcalls, there are two calls in the output instead of one.
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
entry:
%tmp19.i.i = load double* null, align 4 ; <double> [#uses=2]
%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00 ; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i: ; preds = %bb28.i
br i1 false, label %bb.nph53.i, label %bb508.i
bb508.i: ; preds = %bb502.loopexit.i, %entry
- ret i32 1
+ ret double %tmp10.i4
}
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
index 7f00eb3..6419292 100644
--- a/test/CodeGen/ARM/ctor_order.ll
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -6,13 +6,15 @@
; DARWIN: .long _f151
; DARWIN-NEXT: .long _f152
-; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .ctors.65384,"aw",%progbits
+; ELF: .long f151
+; ELF: .section .ctors.65383,"aw",%progbits
; ELF: .long f152
-; ELF-NEXT: .long f151
-; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .init_array.151,"aw",%init_array
; GNUEABI: .long f151
-; GNUEABI-NEXT: .long f152
+; GNUEABI: .section .init_array.152,"aw",%init_array
+; GNUEABI: .long f152
@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
; Test to check argument y's debug info uses FI
; Radar 10048772
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
%struct.tag_s = type { i32, i32, i32 }
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 00e6cb0..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -2,7 +2,7 @@
; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
; Radar 9331779
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
%0 = type opaque
%1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 3972e68..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
target triple = "thumbv7-apple-macosx10.6.7"
;CHECK: Ldebug_loc0:
+;CHECK-NEXT: .long Ltmp0
;CHECK-NEXT: .long Ltmp1
-;CHECK-NEXT: .long Ltmp2
-;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp10-Ltmp9 @ Loc expr size
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]] @ Loc expr size
;CHECK-NEXT: .short Lset[[N]]
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Ltmp[[M]]:
;CHECK-NEXT: .byte 144 @ DW_OP_regx for S register
define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
- RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+ ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+ call void @_Z1fv()
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+ %a.addr = alloca i1, align 4
+ %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+ store i1 %0, i1* %a.addr, align 4
+ ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+ %a.addr = alloca i8, align 4
+ %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+ store i8 %0, i8* %a.addr, align 4
+ ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+ %a.addr = alloca i16, align 4
+ %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+ store i16 %0, i16* %a.addr, align 4
+ ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+ %a.addr = alloca i1, align 4
+ %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+ store i1 %0, i1* %a.addr, align 4
+ ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+ %a.addr = alloca i8, align 4
+ %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+ store i8 %0, i8* %a.addr, align 4
+ ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+ %a.addr = alloca i16, align 4
+ %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+ store i16 %0, i16* %a.addr, align 4
+ ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+ %a.addr = alloca i1, align 4
+ %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+ store i1 %0, i1* %a.addr, align 4
+ ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+ %a.addr = alloca i8, align 4
+ %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+ store i8 %0, i8* %a.addr, align 4
+ ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+ %a.addr = alloca i16, align 4
+ %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+ store i16 %0, i16* %a.addr, align 4
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index b7acfaa..625adc2 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.false: ; preds = %entry
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+ br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8: ; preds = %cond.end
+ br label %cond.end8
+
+cond.false8: ; preds = %cond.end
+ br label %cond.end8
+
+cond.end8: ; preds = %cond.false8, %cond.true8
+ %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+ call void @fooi8(i8 %cond8)
+ br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16: ; preds = %cond.end8
+ br label %cond.end16
+
+cond.false16: ; preds = %cond.end8
+ br label %cond.end16
+
+cond.end16: ; preds = %cond.false16, %cond.true16
+ %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+ call void @fooi16(i16 %cond16)
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 695dbba..dd460b2 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t0(i1 zeroext %a) nounwind {
%1 = zext i1 %a to i32
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 33c6008..1693066 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define void @t1a(float %a) uwtable ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 14666a8..686ccad 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Test sitofp
@@ -94,3 +94,149 @@ entry:
store double %conv, double* %b.addr, align 8
ret void
}
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+ %b.addr = alloca float, align 4
+ %conv = uitofp i32 %a to float
+ store float %conv, float* %b.addr, align 4
+ ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+ %b.addr = alloca float, align 4
+ %conv = uitofp i16 %a to float
+ store float %conv, float* %b.addr, align 4
+ ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+ %b.addr = alloca float, align 4
+ %conv = uitofp i8 %a to float
+ store float %conv, float* %b.addr, align 4
+ ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+ %b.addr = alloca double, align 8
+ %conv = uitofp i32 %a to double
+ store double %conv, double* %b.addr, align 8
+ ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+ %b.addr = alloca double, align 8
+ %conv = uitofp i16 %a to double
+ store double %conv, double* %b.addr, align 8
+ ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+ %b.addr = alloca double, align 8
+ %conv = uitofp i8 %a to double
+ store double %conv, double* %b.addr, align 8
+ ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+ %b.addr = alloca i32, align 4
+ %conv = fptosi float %a to i32
+ store i32 %conv, i32* %b.addr, align 4
+ ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+ %b.addr = alloca i32, align 8
+ %conv = fptosi double %a to i32
+ store i32 %conv, i32* %b.addr, align 8
+ ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+ %b.addr = alloca i32, align 4
+ %conv = fptoui float %a to i32
+ store i32 %conv, i32* %b.addr, align 4
+ ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+ %b.addr = alloca i32, align 8
+ %conv = fptoui double %a to i32
+ store i32 %conv, i32* %b.addr, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 028d940..7e147c7 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Target-specific selector can't properly handle the double because it isn't
; being passed via a register, so the materialized arguments become dead code.
@@ -15,8 +15,7 @@ entry:
; THUMB-NOT: sxtb
; THUMB: movs r0, #0
; THUMB: movt r0, #0
-; THUMB: add sp, #32
-; THUMb: pop {r7, pc}
+; THUMB: pop
ret i32 0
}
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index deffe7b..8764bef 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+ br label %L0
+
+L0:
+ br label %L1
+
+L1:
+ indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 3ef8bce..e6bdfa7 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
@temp = common global [60 x i8] zeroinitializer, align 1
define void @t1() nounwind ssp {
; ARM: t1
-; ARM: ldr r0, LCPI0_0
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
; ARM: add r0, r0, #5
; ARM: movw r1, #64
; ARM: movw r2, #10
; ARM: uxtb r1, r1
; ARM: bl _memset
; THUMB: t1
-; THUMB: ldr.n r0, LCPI0_0
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
; THUMB: adds r0, #5
; THUMB: movs r1, #64
; THUMB: movt r1, #0
@@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
define void @t2() nounwind ssp {
; ARM: t2
-; ARM: ldr r0, LCPI1_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
@@ -39,7 +42,8 @@ define void @t2() nounwind ssp {
; ARM: ldr r1, [sp] @ 4-byte Reload
; ARM: bl _memcpy
; THUMB: t2
-; THUMB: ldr.n r0, LCPI1_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
@@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
define void @t3() nounwind ssp {
; ARM: t3
-; ARM: ldr r0, LCPI2_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
; ARM: add r1, r0, #4
; ARM: add r0, r0, #16
@@ -63,7 +68,8 @@ define void @t3() nounwind ssp {
; ARM: mov r0, r1
; ARM: bl _memmove
; THUMB: t3
-; THUMB: ldr.n r0, LCPI2_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
; THUMB: adds r1, r0, #4
; THUMB: adds r0, #16
@@ -77,26 +83,24 @@ define void @t3() nounwind ssp {
define void @t4() nounwind ssp {
; ARM: t4
-; ARM: ldr r0, LCPI3_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
; ARM: ldr r0, [r0]
-; ARM: ldr r1, LCPI3_1
-; ARM: ldr r1, [r1]
-; ARM: ldr r2, [r1, #16]
-; ARM: str r2, [r0, #4]
-; ARM: ldr r2, [r1, #20]
-; ARM: str r2, [r0, #8]
-; ARM: ldrh r1, [r1, #24]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
; ARM: strh r1, [r0, #12]
; ARM: bx lr
-; THUMB: ldr.n r0, LCPI3_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
; THUMB: ldr r0, [r0]
-; THUMB: ldr.n r1, LCPI3_1
-; THUMB: ldr r1, [r1]
-; THUMB: ldr r2, [r1, #16]
-; THUMB: str r2, [r0, #4]
-; THUMB: ldr r2, [r1, #20]
-; THUMB: str r2, [r0, #8]
-; THUMB: ldrh r1, [r1, #24]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 0b8a768..2a88678 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i32* nocapture %ptr) nounwind readonly {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index dcfc9d0..e8cc2b2 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; rdar://10418009
define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index daf56e7..b180e43 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; rdar://10412592
; Note: The Thumb code is being generated by the target-independent selector.
@@ -104,4 +104,4 @@ entry:
; THUMB: movt r0, #33023
call void @foo(i32 -2130706433)
ret void
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 4203537..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
; This isn't exactly a useful set of command-line options, but check that it
; doesn't crash. (It was crashing because a register was getting redefined.)
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index f7f4521..689b169 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
; Sign-extend of i1 currently not supported by fast-isel
;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
@@ -46,3 +46,12 @@ entry:
; CHECK: bx lr
ret i16 %a
}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+ ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 9ac63d6..b83a733 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i1 %c) nounwind readnone {
entry:
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 648d711..905543a 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Very basic fast-isel functionality.
define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,21 +142,19 @@ define void @test4() {
store i32 %b, i32* @test4g
ret void
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
-; ARM: ldr r0, LCPI4_1
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
; ARM: ldr r0, [r0]
-; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
}
; Check unaligned stores
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..87115cc 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
; rdar://8984306
define float @test1(float %x, float %y) nounwind {
@@ -60,8 +60,8 @@ entry:
define float @test5() nounwind {
entry:
; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
; SOFT: vbsl [[REG6]], [[REG7]],
%0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index ad03202..80925c7 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
; rdar://7461510
+; rdar://10964603
+; Disable this optimization unless we know one of them is zero.
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr s0,
-; NAN: vldr s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: beq
%0 = load float* %a
%1 = load float* %b
%2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
ret i32 %4
}
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
%0 = load double* %a
%1 = fcmp oeq double %0, 0.000000e+00
br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
%0 = load float* %a
%1 = fcmp oeq float %0, 0.000000e+00
br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..40e8bb2
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+ %1 = fmul double %d1, %d2
+ %2 = fadd double %1, %d3
+ ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+ %1 = fmul float %f1, %f2
+ %2 = fadd float %1, %f3
+ ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+ %1 = fmul double %d2, %d3
+ %2 = fsub double %d1, %1
+ ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+ %1 = fmul float %f2, %f3
+ %2 = fsub float %f1, %1
+ ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+ %1 = fmul double %d1, %d2
+ %2 = fsub double -0.0, %1
+ %3 = fsub double %2, %d3
+ ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+ %1 = fmul float %f1, %f2
+ %2 = fsub float -0.0, %1
+ %3 = fsub float %2, %f3
+ ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+ %1 = fmul double %d1, %d2
+ %2 = fsub double %1, %d3
+ ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+ %1 = fmul float %f1, %f2
+ %2 = fsub float %1, %f3
+ ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+ %mul = fmul <2 x float> %a, %b
+ %add = fadd <2 x float> %mul, %a
+ ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+ %mul = fmul <2 x float> %a, %b
+ %sub = fsub <2 x float> %a, %mul
+ ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+ %mul = fmul <4 x float> %a, %b
+ %add = fadd <4 x float> %mul, %a
+ ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+ %mul = fmul <4 x float> %a, %b
+ %sub = fsub <4 x float> %a, %mul
+ ret <4 x float> %sub
+}
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
; RUN: grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
@str = internal constant [12 x i8] c"Hello World\00"
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
%tmp2 = icmp eq i32 %a, 0
br i1 %tmp2, label %cond_false, label %cond_true
cond_true:
+; CHECK: subeq r0, r1, #1
%tmp5 = add i32 %b, 1
ret i32 %tmp5
cond_false:
+; CHECK: addne r0, r1, #1
%tmp7 = add i32 %b, -1
ret i32 %tmp7
}
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
; rdar://8402126
; Make sure if-converter is not predicating vldmia and ldmia. These are
; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
switch i32 %c, label %cond_next [
i32 1, label %cond_true
i32 7, label %cond_true
]
cond_true:
+; CHECK: addne r0
+; CHECK: bxne
%tmp12 = add i32 %a, 1
%tmp1518 = add i32 %tmp12, %b
ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
@x = external global i32* ; <i32**> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
define void @foo(i32 %X, i32 %Y) {
entry:
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
; RUN: grep mov | count 3
define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index d72e9bf..a588bc3 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
; Magic ARM pair hints works best with linearscan / fast.
@@ -23,3 +25,47 @@ entry:
%2 = mul i64 %1, %a
ret i64 %2
}
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldr
+; GREEDY: ldr
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+ %0 = add nsw i32 %n, -1 ; <i32> [#uses=2]
+ %1 = icmp sgt i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %bb, label %return
+
+bb: ; preds = %bb, %entry
+ %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ] ; <i32> [#uses=3]
+ %scevgep = getelementptr i32* %a, i32 %i.03 ; <i32*> [#uses=1]
+ %scevgep4 = getelementptr i32* %b, i32 %i.03 ; <i32*> [#uses=1]
+ %tmp = add i32 %i.03, 1 ; <i32> [#uses=3]
+ %scevgep5 = getelementptr i32* %a, i32 %tmp ; <i32*> [#uses=1]
+ %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %3 = load i32* %scevgep5, align 4 ; <i32> [#uses=1]
+ %4 = add nsw i32 %3, %2 ; <i32> [#uses=1]
+ store i32 %4, i32* %scevgep4, align 4
+ %exitcond = icmp eq i32 %tmp, %0 ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+ if not config.parent:
+ return config
+ return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+ config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+ %tmp2 = load i8* %call
+ %tmp3 = trunc i8 %tmp2 to i1
+ %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+ store double %cond, double* %p
+ ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+ ; CHECK: bl log2
+ %1 = call double @log2(double 0.000000e+00)
+ ; CHECK: bl exp2
+ %2 = call double @exp2(double 0.000000e+00)
+ ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
; CHECK: add
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
%struct.partition_entry = type { i32, i32, i64, i64 }
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..f566974 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
;rdar://8003725
@G1 = external global i32
@@ -6,6 +6,7 @@
define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
entry:
+; CHECK: f1:
; CHECK: cmp
; CHECK: moveq
; CHECK-NOT: cmp
@@ -16,3 +17,31 @@ entry:
%tmp4 = add i32 %tmp2, %tmp3
ret i32 %tmp4
}
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+ %0 = load i32* @foo, align 4
+ %cmp28 = icmp sgt i32 %0, 0
+ br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph: ; preds = %entry
+ %1 = icmp sgt i32 %0, 1
+ %smax = select i1 %1, i32 %0, i32 1
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+ unreachable
+
+for.cond1.preheader: ; preds = %entry
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
@from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
; EABI memset swaps arguments
; CHECK: mov r1, #0
; CHECK: memset
+ ; DARWIN: movs r1, #0
+ ; DARWIN: memset
; EABI: mov r2, #0
; EABI: __aeabi_memset
call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..677b9c2
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+ call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+ %2 = call arm_aapcs_vfpcc %0** @func2() nounwind
+ %3 = load %0** %2, align 4, !tbaa !0
+ store float 0.000000e+00, float* undef, align 4
+ %4 = call arm_aapcs_vfpcc %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+ call arm_aapcs_vfpcc void @func1(%0* %3, float* undef, float* undef, %2* undef)
+ %5 = call arm_aapcs_vfpcc %0** @func2() nounwind
+ store float 1.000000e+00, float* undef, align 4
+ call arm_aapcs_vfpcc void @func1(%0* undef, float* undef, float* undef, %2* undef)
+ store float 1.500000e+01, float* undef, align 4
+ %6 = call arm_aapcs_vfpcc %2** @func4() nounwind
+ %7 = call arm_aapcs_vfpcc %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+ %8 = fadd float undef, -1.000000e+05
+ store float %8, float* undef, align 16, !tbaa !3
+ %9 = call arm_aapcs_vfpcc i32 @rand() nounwind
+ %10 = fmul float undef, 2.000000e+05
+ %11 = fadd float %10, -1.000000e+05
+ store float %11, float* undef, align 4, !tbaa !3
+ call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+ ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled
+; name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
ret i32 %conv3
}
+; rdar://10750814
define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
entry:
; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
%conv = zext i16 %v to i32
%shr4 = lshr i32 %conv, 8
%shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..3a66ec5 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
%s = or i32 %z, %y
ret i32 %s
}
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq.w r2, r2, #1
+ %tmp1 = icmp eq i32 %a, %b
+ %tmp2 = zext i1 %tmp1 to i32
+ %tmp3 = or i32 %tmp2, %c
+ ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+ %cond = icmp slt i32 %a, %b
+ %tmp1 = select i1 %cond, i32 %c, i32 0
+ %tmp2 = xor i32 %tmp1, %d
+ ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+ %tmp1 = shl i32 %c, 1
+ %cond = icmp eq i32 %a, %b
+ %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+ %tmp3 = and i32 %c, %tmp2
+ ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..521ffa1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,12 @@ declare i8* @malloc(...)
define fastcc void @test4(i16 %addr) nounwind {
entry:
; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8: str [[REG]], [r0, r1, lsl #2]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0]
; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9: str [[REG]], [r0, r1, lsl #2]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0]
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
%1 = bitcast i8* %0 to i32*
%2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
; CHECK: vst1.64 {{.*}}sp, :128
; CHECK: vld1.64 {{.*}}sp, :128
entry:
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+ %0 = load i32* %opcodes, align 4, !tbaa !0
+ %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+ br label %indirectgoto
+
+INCREMENT: ; preds = %indirectgoto
+ %inc = add nsw i32 %result.0, 1
+ %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+ br label %indirectgoto
+
+DECREMENT: ; preds = %indirectgoto
+ %dec = add nsw i32 %result.0, -1
+ %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+ %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+ br label %indirectgoto
+
+indirectgoto: ; preds = %DECREMENT, %INCREMENT, %entry
+ %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+ %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+ %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+ %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+ %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+ indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN: ; preds = %indirectgoto
+ ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+; for (unsigned long i = 0; i < len; i += s) {
+; c[i] = a[i] + b[i];
+; }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+ %cmp8 = icmp eq i32 %len, 0
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+ %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+ %0 = load i8* %arrayidx, align 1
+ %conv6 = zext i8 %0 to i32
+ %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+ %1 = load i8* %arrayidx1, align 1
+ %conv27 = zext i8 %1 to i32
+ %add = add nsw i32 %conv27, %conv6
+ %conv3 = trunc i32 %add to i8
+ %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+ store i8 %conv3, i8* %arrayidx4, align 1
+ %add5 = add i32 %i.09, %s
+ %cmp = icmp ult i32 %add5, %len
+ br i1 %cmp, label %for.body.1, label %for.end
+
+for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+ ret void
+
+for.body.1: ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+ %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+ %2 = load i8* %arrayidx.1, align 1
+ %conv6.1 = zext i8 %2 to i32
+ %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+ %3 = load i8* %arrayidx1.1, align 1
+ %conv27.1 = zext i8 %3 to i32
+ %add.1 = add nsw i32 %conv27.1, %conv6.1
+ %conv3.1 = trunc i32 %add.1 to i8
+ %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+ store i8 %conv3.1, i8* %arrayidx4.1, align 1
+ %add5.1 = add i32 %add5, %s
+ %cmp.1 = icmp ult i32 %add5.1, %len
+ br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2: ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+ %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+ %4 = load i8* %arrayidx.2, align 1
+ %conv6.2 = zext i8 %4 to i32
+ %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+ %5 = load i8* %arrayidx1.2, align 1
+ %conv27.2 = zext i8 %5 to i32
+ %add.2 = add nsw i32 %conv27.2, %conv6.2
+ %conv3.2 = trunc i32 %add.2 to i8
+ %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+ store i8 %conv3.2, i8* %arrayidx4.2, align 1
+ %add5.2 = add i32 %add5.1, %s
+ %cmp.2 = icmp ult i32 %add5.2, %len
+ br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3: ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+ %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+ %6 = load i8* %arrayidx.3, align 1
+ %conv6.3 = zext i8 %6 to i32
+ %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+ %7 = load i8* %arrayidx1.3, align 1
+ %conv27.3 = zext i8 %7 to i32
+ %add.3 = add nsw i32 %conv27.3, %conv6.3
+ %conv3.3 = trunc i32 %add.3 to i8
+ %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+ store i8 %conv3.3, i8* %arrayidx4.3, align 1
+ %add5.3 = add i32 %add5.2, %s
+ %cmp.3 = icmp ult i32 %add5.3, %len
+ br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index be95657..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -381,3 +381,20 @@ entry:
store <4 x float> %b, <4 x float> *%p
ret void
}
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+ %and.i186 = zext <4 x i1> %x to <4 x i32>
+ %add.i185 = sub <4 x i32> %and.i186, %y
+ %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+ %add.i = add <4 x i32> %sub.i, zeroinitializer
+ %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+ tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+ unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
ret void
}
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+ %tmp1 = load <4 x i16>* %B
+ tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+ %t5 = getelementptr inbounds i8* %out, i32 16
+ ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+ %tmp1 = load <4 x float>* %this
+ call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+ %tmp2 = getelementptr inbounds i8* %out, i32 32
+ ret i8* %tmp2
+}
+
declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind