diff options
Diffstat (limited to 'test/CodeGen/ARM')
73 files changed, 1532 insertions, 161 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 3694aaa..0bfe331 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index df9dbca..0ae7f84 100644 --- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -11,7 +11,7 @@ entry: ; THUMB: t: ; THUMB-NOT: str r0, [r1], r0 -; THUMB: str r2, [r1] +; THUMB: str r1, [r0] %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1] store i32 0, i32* inttoptr (i32 8 to i32*), align 8 br i1 undef, label %bb.nph96, label %bb3 diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index a65cf4b..e0f50c9 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=basic +; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic ; This test would crash the rewriter when trying to handle a spill after one of ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register. diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index b9d5600..1aee508 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10" ; CHECK: vld1.64 {d16, d17}, [r{{.}}] ; CHECK-NOT: vld1.64 {d16, d17} -; CHECK: vmov.f64 d19, d16 +; CHECK: vmov.f64 define i32 @test(i8* %arg) nounwind { entry: diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll index e3c18ce..da4d157 100644 --- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll +++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2 +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2 ; rdar://8690640 define i32* @t(i32* %x) nounwind { diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll index c65952b..23e1aa1 100644 --- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll +++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s ; rdar://8728956 define hidden void @foo() nounwind ssp { diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll index ccda281..2faa04a 100644 --- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll +++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4. ; rdar://9133587 diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll index 7baacfe..3e78c46 100644 --- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll +++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" +target triple = "thumbv7-apple-ios" %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }> %struct.B = type <{ i32, i16, i16 }> diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll index 17264ee..216057a 100644 --- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll +++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s ; Test that ldmia_ret preserves implicit operands for return values. ; ; This CFG is reduced from a benchmark miscompile. With current diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index 86e8712..6fbae19 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind { ; CHECK: movw r1, :lower16:{{.*}} ; CHECK: movt r1, :upper16:{{.*}} -; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]} -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]] +; CHECK: vldmia r1 +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vstmia {{.*}} L.entry: diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll new file mode 100644 index 0000000..ddb7632 --- /dev/null +++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s + +; Radar 10567930: Make sure that all the caller-saved registers are saved and +; restored in a function with setjmp/longjmp EH. In particular, r6 was not +; being saved here. +; CHECK: push {r4, r5, r6, r7, lr} + +%0 = type opaque +%struct.NSConstantString = type { i32*, i32, i8*, i32 } + +define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) { +bb: + %tmp = alloca i32, align 4 + %tmp1 = alloca i32, align 4 + %tmp2 = alloca i8*, align 4 + %tmp3 = alloca i1 + %myException = alloca %0*, align 4 + %tmp4 = alloca i8* + %tmp5 = alloca i32 + %exception = alloca %0*, align 4 + store i32 %a, i32* %tmp, align 4 + store i32 %b, i32* %tmp1, align 4 + store i8* %d, i8** %tmp2, align 4 + store i1 false, i1* %tmp3 + %tmp7 = load i8** %c + %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null) + to label %bb11 unwind label %bb15 + +bb11: ; preds = %bb + store %0* %tmp10, %0** %myException, align 4 + %tmp12 = load %0** %myException, align 4 + %tmp13 = bitcast %0* %tmp12 to i8* + invoke void @objc_exception_throw(i8* %tmp13) noreturn + to label %bb14 unwind label %bb15 + +bb14: ; preds = %bb11 + unreachable + +bb15: ; preds = %bb11, %bb + %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + catch i8* null + %tmp17 = extractvalue { i8*, i32 } %tmp16, 0 + store i8* %tmp17, i8** %tmp4 + %tmp18 = extractvalue { i8*, i32 } %tmp16, 1 + store i32 %tmp18, i32* %tmp5 + store i1 true, i1* %tmp3 + br label %bb56 + +bb56: + unreachable +} + +declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind +declare i32 @__objc_personality_v0(...) +declare void @objc_exception_throw(i8*) diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll new file mode 100644 index 0000000..926daaf --- /dev/null +++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs +; PR11829 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + br label %bb3 + +bb3: ; preds = %bb4, %bb2 + %tmp = icmp slt i32 undef, undef + br i1 %tmp, label %bb4, label %bb67 + +bb4: ; preds = %bb3 + %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> + %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> + %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float> + %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>) + %tmp10 = fmul <4 x float> undef, %tmp9 + %tmp11 = fadd <4 x float> undef, %tmp10 + %tmp12 = bitcast <4 x float> zeroinitializer to i128 + %tmp13 = lshr i128 %tmp12, 64 + %tmp14 = trunc i128 %tmp13 to i64 + %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1 + %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind + %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind + %tmp18 = fmul <4 x float> %tmp17, %tmp16 + %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind + %tmp20 = fmul <4 x float> %tmp19, %tmp18 + %tmp21 = fmul <4 x float> %tmp20, zeroinitializer + %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind + call arm_aapcs_vfpcc void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind + %tmp23 = bitcast <4 x float> %tmp22 to i128 + %tmp24 = trunc i128 %tmp23 to i64 + %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0 + %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1 + %tmp27 = load float* undef, align 4, !tbaa !2 + %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3 + %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> + %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> + %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float> + %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>) + %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind + %tmp35 = fmul <4 x float> %tmp34, undef + %tmp36 = fmul <4 x float> %tmp35, undef + %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp38 = load float* undef, align 4, !tbaa !2 + %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0 + %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp41 = load float* undef, align 4, !tbaa !2 + %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3 + %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer + %tmp44 = fmul <4 x float> %tmp33, %tmp43 + %tmp45 = fadd <4 x float> %tmp42, %tmp44 + %tmp46 = fsub <4 x float> %tmp45, undef + %tmp47 = fmul <4 x float> %tmp46, %tmp36 + %tmp48 = fadd <4 x float> undef, %tmp47 + %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind + %tmp50 = load float* undef, align 4, !tbaa !2 + %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3 + %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind + %tmp54 = load float* %tmp52, align 4, !tbaa !2 + %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3 + %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22 + %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind + %tmp58 = fmul <4 x float> undef, %tmp57 + %tmp59 = fsub <4 x float> %tmp51, %tmp48 + %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58 + %tmp61 = fmul <4 x float> %tmp59, %tmp60 + %tmp62 = fadd <4 x float> %tmp48, %tmp61 + call arm_aapcs_vfpcc void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef) + %tmp63 = bitcast <4 x float> %tmp62 to i128 + %tmp64 = lshr i128 %tmp63, 64 + %tmp65 = trunc i128 %tmp64 to i64 + %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1 + call arm_aapcs_vfpcc void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef) + br label %bb3 + +bb67: ; preds = %bb3 + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64]) + +declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2 + +declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} +!2 = metadata !{metadata !"float", metadata !0} diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll new file mode 100644 index 0000000..872eca3 --- /dev/null +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -0,0 +1,67 @@ +; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing +; PR11841 +; PR11829 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-eabi" + +; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE. +define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 { +bb: + %tmp = load <2 x float>* undef, align 8, !tbaa !0 + %tmp2 = extractelement <2 x float> %tmp, i32 0 + %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0 + %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1 + %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2 + %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3 + %tmp7 = extractelement <2 x float> %tmp, i32 1 + %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1 + %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2 + %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3 + %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64> + %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float> + %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer + %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64> + %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float> + %tmp18 = extractelement <2 x float> %tmp17, i32 0 + tail call arm_aapcs_vfpcc void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind + %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64> + %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float> + %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64> + %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float> + %tmp26 = extractelement <2 x float> %tmp25, i32 0 + tail call arm_aapcs_vfpcc void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind + ret void +} + +define arm_aapcs_vfpcc void @foo2() nounwind uwtable { +entry: + br i1 undef, label %for.end, label %cond.end295 + +cond.end295: ; preds = %entry + %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1> + %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float> + %1 = bitcast <4 x float> undef to <2 x i64> + %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1> + %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float> + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind + tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind + unreachable + +for.end: ; preds = %entry + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll new file mode 100644 index 0000000..ec5b2e9 --- /dev/null +++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -verify-coalescing +; PR11861 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-eabi" + +define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 { + br label %1 + +; <label>:1 ; preds = %1, %0 + %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ] + %3 = bitcast <4 x float> %2 to <2 x i64> + %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer + %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1> + %6 = bitcast <2 x i32> zeroinitializer to <2 x float> + %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2> + %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1> + %9 = bitcast <2 x float> %7 to <1 x i64> + %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1> + %11 = bitcast <2 x i64> %10 to <4 x float> + br label %1 +} diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll new file mode 100644 index 0000000..5f24e42 --- /dev/null +++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll @@ -0,0 +1,121 @@ +; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs +; PR11765 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +; This test case exercises the MachineCopyPropagation pass by disabling the +; RegisterCoalescer. + +define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 { +bb: + br i1 undef, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + br i1 undef, label %bb92, label %bb3 + +bb3: ; preds = %bb2 + %tmp = or <4 x i32> undef, undef + %tmp4 = bitcast <4 x i32> %tmp to <4 x float> + %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4 + %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float> + %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> + %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64> + %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float> + %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1> + %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float> + %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2> + %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2> + %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64> + %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64> + %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32> + %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0> + %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32> + %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1> + %tmp22 = or <2 x i32> %tmp19, %tmp21 + %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64> + %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1> + %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float> + %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1> + %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32> + %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0> + %tmp29 = and <2 x i32> undef, <i32 0, i32 -1> + %tmp30 = or <2 x i32> %tmp28, %tmp29 + %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64> + %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3 + %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> + %tmp34 = fadd <4 x float> %tmp33, %tmp32 + %tmp35 = fmul <4 x float> %tmp33, zeroinitializer + %tmp36 = fadd <4 x float> %tmp35, zeroinitializer + %tmp37 = fadd <4 x float> %tmp35, zeroinitializer + %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64> + %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float> + %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %tmp42 = load <4 x float>* null, align 16, !tbaa !0 + %tmp43 = fmul <4 x float> %tmp42, %tmp41 + %tmp44 = load <4 x float>* undef, align 16, !tbaa !0 + %tmp45 = fadd <4 x float> undef, %tmp43 + %tmp46 = fadd <4 x float> undef, %tmp45 + %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64> + %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer + %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float> + %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %tmp51 = fmul <4 x float> %tmp42, %tmp50 + %tmp52 = fmul <4 x float> %tmp44, undef + %tmp53 = fadd <4 x float> %tmp52, %tmp51 + %tmp54 = fadd <4 x float> undef, %tmp53 + %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64> + %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1> + %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float> + %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer + %tmp59 = fmul <4 x float> undef, %tmp58 + %tmp60 = fadd <4 x float> %tmp59, undef + %tmp61 = fadd <4 x float> %tmp60, zeroinitializer + %tmp62 = load void (i8*, i8*)** undef, align 4 + call arm_aapcs_vfpcc void %tmp62(i8* sret undef, i8* undef) nounwind + %tmp63 = bitcast <4 x float> %tmp46 to i128 + %tmp64 = bitcast <4 x float> %tmp54 to i128 + %tmp65 = bitcast <4 x float> %tmp61 to i128 + %tmp66 = lshr i128 %tmp63, 64 + %tmp67 = trunc i128 %tmp66 to i64 + %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1 + %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2 + %tmp70 = lshr i128 %tmp64, 64 + %tmp71 = trunc i128 %tmp70 to i64 + %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3 + %tmp73 = trunc i128 %tmp65 to i64 + %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4 + %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5 + %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6 + %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7 + call arm_aapcs_vfpcc void @bar(i8* sret null, [8 x i64] %tmp77) nounwind + %tmp78 = call arm_aapcs_vfpcc i8* null(i8* null) nounwind + %tmp79 = bitcast i8* %tmp78 to i512* + %tmp80 = load i512* %tmp79, align 16 + %tmp81 = lshr i512 %tmp80, 128 + %tmp82 = trunc i512 %tmp80 to i128 + %tmp83 = trunc i512 %tmp81 to i128 + %tmp84 = bitcast i128 %tmp83 to <4 x float> + %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64> + %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1> + %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float> + %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %tmp89 = fmul <4 x float> undef, %tmp88 + %tmp90 = fadd <4 x float> %tmp89, undef + %tmp91 = fadd <4 x float> undef, %tmp90 + store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0 + unreachable + +bb92: ; preds = %bb2 + ret void +} + +declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll new file mode 100644 index 0000000..6c7aaad --- /dev/null +++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll @@ -0,0 +1,26 @@ +; RUN: llc -verify-coalescing < %s +; PR11868 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +%0 = type { <4 x float> } +%1 = type { <4 x float> } + +@foo = external global %0, align 16 + +define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind { + %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16 + %5 = extractelement <4 x float> %4, i32 0 + %6 = extractelement <4 x float> %4, i32 1 + %7 = extractelement <4 x float> %4, i32 2 + %8 = insertelement <4 x float> undef, float %5, i32 0 + %9 = insertelement <4 x float> %8, float %6, i32 1 + %10 = insertelement <4 x float> %9, float %7, i32 2 + %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3 + store <4 x float> %11, <4 x float>* undef, align 16 + call arm_aapcs_vfpcc void @baz(%1* undef, float 0.000000e+00) nounwind + ret void +} + +declare arm_aapcs_vfpcc void @baz(%1*, float) diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll index 95edaad..1272e8e 100644 --- a/test/CodeGen/ARM/arm-returnaddr.ll +++ b/test/CodeGen/ARM/arm-returnaddr.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s -; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s ; rdar://8015977 ; rdar://8020118 diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index 02ce5a1..8967730 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s define void @func(i32 %argc, i8** %argv) nounwind { entry: @@ -61,7 +61,7 @@ entry: ; CHECK: strex %7 = atomicrmw min i32* %val2, i32 16 monotonic store i32 %7, i32* %old - %neg = sub i32 0, 1 ; <i32> [#uses=1] + %neg = sub i32 0, 1 ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -77,5 +77,85 @@ entry: ; CHECK: strex %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old - ret void + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %11 = atomicrmw umin i32* %val2, i32 16 monotonic + store i32 %11, i32* %old + %uneg = sub i32 0, 1 + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic + store i32 %12, i32* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %13 = atomicrmw umax i32* %val2, i32 1 monotonic + store i32 %13, i32* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %14 = atomicrmw umax i32* %val2, i32 0 monotonic + store i32 %14, i32* %old + + ret void +} + +define void @func2() nounwind { +entry: + %val = alloca i16 + %old = alloca i16 + store i16 31, i16* %val + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %0 = atomicrmw umin i16* %val, i16 16 monotonic + store i16 %0, i16* %old + %uneg = sub i16 0, 1 + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %1 = atomicrmw umin i16* %val, i16 %uneg monotonic + store i16 %1, i16* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %2 = atomicrmw umax i16* %val, i16 1 monotonic + store i16 %2, i16* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %3 = atomicrmw umax i16* %val, i16 0 monotonic + store i16 %3, i16* %old + ret void +} + +define void @func3() nounwind { +entry: + %val = alloca i8 + %old = alloca i8 + store i8 31, i8* %val + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %0 = atomicrmw umin i8* %val, i8 16 monotonic + store i8 %0, i8* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %uneg = sub i8 0, 1 + %1 = atomicrmw umin i8* %val, i8 %uneg monotonic + store i8 %1, i8* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %2 = atomicrmw umax i8* %val, i8 1 monotonic + store i8 %2, i8* %old + ; CHECK: ldrex + ; CHECK: cmp + ; CHECK: strex + %3 = atomicrmw umax i8* %val, i8 0 monotonic + store i8 %3, i8* %old + ret void } diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 877ec18..1b385ab 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -6,9 +6,9 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone { entry: ; CHECK: t1: -; CHECK: muls [[REG:(r[0-9]+)]], r2, r3 -; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r0, r1 -; CHECK-NEXT: muls r0, [[REG2]], [[REG]] +; CHECK: muls [[REG:(r[0-9]+)]], r3, r2 +; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r1, r0 +; CHECK-NEXT: muls r0, [[REG]], [[REG2]] %0 = mul nsw i32 %a, %b %1 = mul nsw i32 %c, %d %2 = mul nsw i32 %0, %1 diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index f78d998..be3e105 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6 +; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D ; Enable tailcall optimization for iOS 5.0 diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll index 91ef659..487ec69 100644 --- a/test/CodeGen/ARM/code-placement.ll +++ b/test/CodeGen/ARM/code-placement.ll @@ -12,9 +12,9 @@ entry: br i1 %0, label %bb2, label %bb bb: -; CHECK: LBB0_2: -; CHECK: bne LBB0_2 -; CHECK-NOT: b LBB0_2 +; CHECK: LBB0_1: +; CHECK: bne LBB0_1 +; CHECK-NOT: b LBB0_1 ; CHECK: bx lr %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll new file mode 100644 index 0000000..eff5de5 --- /dev/null +++ b/test/CodeGen/ARM/cse-call.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "armv6-apple-ios0.0.0" + +; Don't CSE a cmp across a call that clobbers CPSR. +; +; CHECK: cmp +; CHECK: S_trimzeros +; CHECK: cmp +; CHECK: strlen + +@F_floatmul.man1 = external global [200 x i8], align 1 +@F_floatmul.man2 = external global [200 x i8], align 1 + +declare i32 @strlen(i8* nocapture) nounwind readonly +declare void @S_trimzeros(...) + +define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp { +entry: + br i1 undef, label %while.end42, label %while.body37 + +while.body37: ; preds = %while.body37, %entry + br i1 false, label %while.end42, label %while.body37 + +while.end42: ; preds = %while.body37, %entry + %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0) + %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0) + tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind + %call47 = tail call i32 @strlen(i8* %.) nounwind + unreachable +} diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll index 0dcf9dd..1d011be 100644 --- a/test/CodeGen/ARM/cse-libcalls.ll +++ b/test/CodeGen/ARM/cse-libcalls.ll @@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8" ; Without CSE of libcalls, there are two calls in the output instead of one. -define i32 @u_f_nonbon(double %lambda) nounwind { +define double @u_f_nonbon(double %lambda) nounwind { entry: %tmp19.i.i = load double* null, align 4 ; <double> [#uses=2] %tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00 ; <i1> [#uses=1] @@ -26,5 +26,5 @@ bb502.loopexit.i: ; preds = %bb28.i br i1 false, label %bb.nph53.i, label %bb508.i bb508.i: ; preds = %bb502.loopexit.i, %entry - ret i32 1 + ret double %tmp10.i4 } diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll index 7f00eb3..6419292 100644 --- a/test/CodeGen/ARM/ctor_order.ll +++ b/test/CodeGen/ARM/ctor_order.ll @@ -6,13 +6,15 @@ ; DARWIN: .long _f151 ; DARWIN-NEXT: .long _f152 -; ELF: .section .ctors,"aw",%progbits +; ELF: .section .ctors.65384,"aw",%progbits +; ELF: .long f151 +; ELF: .section .ctors.65383,"aw",%progbits ; ELF: .long f152 -; ELF-NEXT: .long f151 -; GNUEABI: .section .init_array,"aw",%init_array +; GNUEABI: .section .init_array.151,"aw",%init_array ; GNUEABI: .long f151 -; GNUEABI-NEXT: .long f152 +; GNUEABI: .section .init_array.152,"aw",%init_array +; GNUEABI: .long f152 @llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ] diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll index b0270f9..a7b44e6 100644 --- a/test/CodeGen/ARM/debug-info-arg.ll +++ b/test/CodeGen/ARM/debug-info-arg.ll @@ -2,7 +2,7 @@ ; Test to check argument y's debug info uses FI ; Radar 10048772 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-macosx10.7.0" +target triple = "thumbv7-apple-ios" %struct.tag_s = type { i32, i32, i32 } diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll index 00e6cb0..0ad0a15 100644 --- a/test/CodeGen/ARM/debug-info-blocks.ll +++ b/test/CodeGen/ARM/debug-info-blocks.ll @@ -2,7 +2,7 @@ ; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0 ; Radar 9331779 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-macosx10.7.0" +target triple = "thumbv7-apple-ios" %0 = type opaque %1 = type { [4 x i32] } diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index 3972e68..ae7af0a 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- target triple = "thumbv7-apple-macosx10.6.7" ;CHECK: Ldebug_loc0: +;CHECK-NEXT: .long Ltmp0 ;CHECK-NEXT: .long Ltmp1 -;CHECK-NEXT: .long Ltmp2 -;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp10-Ltmp9 @ Loc expr size +;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]] @ Loc expr size ;CHECK-NEXT: .short Lset[[N]] -;CHECK-NEXT: Ltmp9: +;CHECK-NEXT: Ltmp[[M]]: ;CHECK-NEXT: .byte 144 @ DW_OP_regx for S register define void @_Z3foov() optsize ssp { diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp deleted file mode 100644 index 3ff359a..0000000 --- a/test/CodeGen/ARM/dg.exp +++ /dev/null @@ -1,5 +0,0 @@ -load_lib llvm.exp - -if { [llvm_supports_target ARM] } { - RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] -} diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll new file mode 100644 index 0000000..fd7d0e6 --- /dev/null +++ b/test/CodeGen/ARM/ehabi-unwind.ll @@ -0,0 +1,16 @@ +; Test that the EHABI unwind instruction generator does not encounter any +; unfamiliar instructions. +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi +; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors + +define void @_Z1fv() nounwind { +entry: + ret void +} + +define void @_Z1gv() nounwind { +entry: + call void @_Z1fv() + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll new file mode 100644 index 0000000..723383e --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-binary.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB + +; Test add with non-legal types + +define void @add_i1(i1 %a, i1 %b) nounwind ssp { +entry: +; ARM: add_i1 +; THUMB: add_i1 + %a.addr = alloca i1, align 4 + %0 = add i1 %a, %b +; ARM: add r0, r0, r1 +; THUMB: add r0, r1 + store i1 %0, i1* %a.addr, align 4 + ret void +} + +define void @add_i8(i8 %a, i8 %b) nounwind ssp { +entry: +; ARM: add_i8 +; THUMB: add_i8 + %a.addr = alloca i8, align 4 + %0 = add i8 %a, %b +; ARM: add r0, r0, r1 +; THUMB: add r0, r1 + store i8 %0, i8* %a.addr, align 4 + ret void +} + +define void @add_i16(i16 %a, i16 %b) nounwind ssp { +entry: +; ARM: add_i16 +; THUMB: add_i16 + %a.addr = alloca i16, align 4 + %0 = add i16 %a, %b +; ARM: add r0, r0, r1 +; THUMB: add r0, r1 + store i16 %0, i16* %a.addr, align 4 + ret void +} + +; Test or with non-legal types + +define void @or_i1(i1 %a, i1 %b) nounwind ssp { +entry: +; ARM: or_i1 +; THUMB: or_i1 + %a.addr = alloca i1, align 4 + %0 = or i1 %a, %b +; ARM: orr r0, r0, r1 +; THUMB: orrs r0, r1 + store i1 %0, i1* %a.addr, align 4 + ret void +} + +define void @or_i8(i8 %a, i8 %b) nounwind ssp { +entry: +; ARM: or_i8 +; THUMB: or_i8 + %a.addr = alloca i8, align 4 + %0 = or i8 %a, %b +; ARM: orr r0, r0, r1 +; THUMB: orrs r0, r1 + store i8 %0, i8* %a.addr, align 4 + ret void +} + +define void @or_i16(i16 %a, i16 %b) nounwind ssp { +entry: +; ARM: or_i16 +; THUMB: or_i16 + %a.addr = alloca i16, align 4 + %0 = or i16 %a, %b +; ARM: orr r0, r0, r1 +; THUMB: orrs r0, r1 + store i16 %0, i16* %a.addr, align 4 + ret void +} + +; Test sub with non-legal types + +define void @sub_i1(i1 %a, i1 %b) nounwind ssp { +entry: +; ARM: sub_i1 +; THUMB: sub_i1 + %a.addr = alloca i1, align 4 + %0 = sub i1 %a, %b +; ARM: sub r0, r0, r1 +; THUMB: subs r0, r0, r1 + store i1 %0, i1* %a.addr, align 4 + ret void +} + +define void @sub_i8(i8 %a, i8 %b) nounwind ssp { +entry: +; ARM: sub_i8 +; THUMB: sub_i8 + %a.addr = alloca i8, align 4 + %0 = sub i8 %a, %b +; ARM: sub r0, r0, r1 +; THUMB: subs r0, r0, r1 + store i8 %0, i8* %a.addr, align 4 + ret void +} + +define void @sub_i16(i16 %a, i16 %b) nounwind ssp { +entry: +; ARM: sub_i16 +; THUMB: sub_i16 + %a.addr = alloca i16, align 4 + %0 = sub i16 %a, %b +; ARM: sub r0, r0, r1 +; THUMB: subs r0, r0, r1 + store i16 %0, i16* %a.addr, align 4 + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll index b7acfaa..625adc2 100644 --- a/test/CodeGen/ARM/fast-isel-br-const.ll +++ b/test/CodeGen/ARM/fast-isel-br-const.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp { entry: diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll new file mode 100644 index 0000000..a0aba69 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-br-phi.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios + +; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic +; non-legal integer types (i.e., i1, i8, i16). + +declare void @fooi8(i8) +declare void @fooi16(i16) + +define void @foo(i1 %cmp) nounwind ssp { +entry: + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ] + br i1 %cond, label %cond.true8, label %cond.false8 + +cond.true8: ; preds = %cond.end + br label %cond.end8 + +cond.false8: ; preds = %cond.end + br label %cond.end8 + +cond.end8: ; preds = %cond.false8, %cond.true8 + %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ] + call void @fooi8(i8 %cond8) + br i1 0, label %cond.true16, label %cond.false16 + +cond.true16: ; preds = %cond.end8 + br label %cond.end16 + +cond.false16: ; preds = %cond.end8 + br label %cond.end16 + +cond.end16: ; preds = %cond.false16, %cond.true16 + %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ] + call void @fooi16(i16 %cond16) + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll index 695dbba..dd460b2 100644 --- a/test/CodeGen/ARM/fast-isel-call.ll +++ b/test/CodeGen/ARM/fast-isel-call.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t0(i1 zeroext %a) nounwind { %1 = zext i1 %a to i32 diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll index 33c6008..1693066 100644 --- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll +++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define void @t1a(float %a) uwtable ssp { entry: diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll index 14666a8..686ccad 100644 --- a/test/CodeGen/ARM/fast-isel-conversion.ll +++ b/test/CodeGen/ARM/fast-isel-conversion.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; Test sitofp @@ -94,3 +94,149 @@ entry: store double %conv, double* %b.addr, align 8 ret void } + +; Test uitofp + +define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { +entry: +; ARM: uitofp_single_i32 +; ARM: vmov s0, r0 +; ARM: vcvt.f32.u32 s0, s0 +; THUMB: uitofp_single_i32 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f32.u32 s0, s0 + %b.addr = alloca float, align 4 + %conv = uitofp i32 %a to float + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { +entry: +; ARM: uitofp_single_i16 +; ARM: uxth r0, r0 +; ARM: vmov s0, r0 +; ARM: vcvt.f32.u32 s0, s0 +; THUMB: uitofp_single_i16 +; THUMB: uxth r0, r0 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f32.u32 s0, s0 + %b.addr = alloca float, align 4 + %conv = uitofp i16 %a to float + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_single_i8(i8 %a) nounwind ssp { +entry: +; ARM: uitofp_single_i8 +; ARM: uxtb r0, r0 +; ARM: vmov s0, r0 +; ARM: vcvt.f32.u32 s0, s0 +; THUMB: uitofp_single_i8 +; THUMB: uxtb r0, r0 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f32.u32 s0, s0 + %b.addr = alloca float, align 4 + %conv = uitofp i8 %a to float + store float %conv, float* %b.addr, align 4 + ret void +} + +define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { +entry: +; ARM: uitofp_double_i32 +; ARM: vmov s0, r0 +; ARM: vcvt.f64.u32 d16, s0 +; THUMB: uitofp_double_i32 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f64.u32 d16, s0 + %b.addr = alloca double, align 8 + %conv = uitofp i32 %a to double + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { +entry: +; ARM: uitofp_double_i16 +; ARM: uxth r0, r0 +; ARM: vmov s0, r0 +; ARM: vcvt.f64.u32 d16, s0 +; THUMB: uitofp_double_i16 +; THUMB: uxth r0, r0 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f64.u32 d16, s0 + %b.addr = alloca double, align 8 + %conv = uitofp i16 %a to double + store double %conv, double* %b.addr, align 8 + ret void +} + +define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { +entry: +; ARM: uitofp_double_i8 +; ARM: uxtb r0, r0 +; ARM: vmov s0, r0 +; ARM: vcvt.f64.u32 d16, s0 +; THUMB: uitofp_double_i8 +; THUMB: uxtb r0, r0 +; THUMB: vmov s0, r0 +; THUMB: vcvt.f64.u32 d16, s0 + %b.addr = alloca double, align 8 + %conv = uitofp i8 %a to double + store double %conv, double* %b.addr, align 8 + ret void +} + +; Test fptosi + +define void @fptosi_float(float %a) nounwind ssp { +entry: +; ARM: fptosi_float +; ARM: vcvt.s32.f32 s0, s0 +; THUMB: fptosi_float +; THUMB: vcvt.s32.f32 s0, s0 + %b.addr = alloca i32, align 4 + %conv = fptosi float %a to i32 + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptosi_double(double %a) nounwind ssp { +entry: +; ARM: fptosi_double +; ARM: vcvt.s32.f64 s0, d16 +; THUMB: fptosi_double +; THUMB: vcvt.s32.f64 s0, d16 + %b.addr = alloca i32, align 8 + %conv = fptosi double %a to i32 + store i32 %conv, i32* %b.addr, align 8 + ret void +} + +; Test fptoui + +define void @fptoui_float(float %a) nounwind ssp { +entry: +; ARM: fptoui_float +; ARM: vcvt.u32.f32 s0, s0 +; THUMB: fptoui_float +; THUMB: vcvt.u32.f32 s0, s0 + %b.addr = alloca i32, align 4 + %conv = fptoui float %a to i32 + store i32 %conv, i32* %b.addr, align 4 + ret void +} + +define void @fptoui_double(double %a) nounwind ssp { +entry: +; ARM: fptoui_double +; ARM: vcvt.u32.f64 s0, d16 +; THUMB: fptoui_double +; THUMB: vcvt.u32.f64 s0, d16 + %b.addr = alloca i32, align 8 + %conv = fptoui double %a to i32 + store i32 %conv, i32* %b.addr, align 8 + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll index 028d940..7e147c7 100644 --- a/test/CodeGen/ARM/fast-isel-deadcode.ll +++ b/test/CodeGen/ARM/fast-isel-deadcode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; Target-specific selector can't properly handle the double because it isn't ; being passed via a register, so the materialized arguments become dead code. @@ -15,8 +15,7 @@ entry: ; THUMB-NOT: sxtb ; THUMB: movs r0, #0 ; THUMB: movt r0, #0 -; THUMB: add sp, #32 -; THUMb: pop {r7, pc} +; THUMB: pop ret i32 0 } diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll index deffe7b..8764bef 100644 --- a/test/CodeGen/ARM/fast-isel-icmp.ll +++ b/test/CodeGen/ARM/fast-isel-icmp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind { entry: diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll new file mode 100644 index 0000000..be8035e --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB + +define void @t1(i8* %x) { +entry: +; ARM: t1 +; THUMB: t1 + br label %L0 + +L0: + br label %L1 + +L1: + indirectbr i8* %x, [ label %L0, label %L1 ] +; ARM: bx r0 +; THUMB: mov pc, r0 +} diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index 3ef8bce..e6bdfa7 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -1,19 +1,21 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1 @temp = common global [60 x i8] zeroinitializer, align 1 define void @t1() nounwind ssp { ; ARM: t1 -; ARM: ldr r0, LCPI0_0 +; ARM: movw r0, :lower16:_message1 +; ARM: movt r0, :upper16:_message1 ; ARM: add r0, r0, #5 ; ARM: movw r1, #64 ; ARM: movw r2, #10 ; ARM: uxtb r1, r1 ; ARM: bl _memset ; THUMB: t1 -; THUMB: ldr.n r0, LCPI0_0 +; THUMB: movw r0, :lower16:_message1 +; THUMB: movt r0, :upper16:_message1 ; THUMB: adds r0, #5 ; THUMB: movs r1, #64 ; THUMB: movt r1, #0 @@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM: t2 -; ARM: ldr r0, LCPI1_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 @@ -39,7 +42,8 @@ define void @t2() nounwind ssp { ; ARM: ldr r1, [sp] @ 4-byte Reload ; ARM: bl _memcpy ; THUMB: t2 -; THUMB: ldr.n r0, LCPI1_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 @@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, define void @t3() nounwind ssp { ; ARM: t3 -; ARM: ldr r0, LCPI2_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] ; ARM: add r1, r0, #4 ; ARM: add r0, r0, #16 @@ -63,7 +68,8 @@ define void @t3() nounwind ssp { ; ARM: mov r0, r1 ; ARM: bl _memmove ; THUMB: t3 -; THUMB: ldr.n r0, LCPI2_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] ; THUMB: adds r1, r0, #4 ; THUMB: adds r0, #16 @@ -77,26 +83,24 @@ define void @t3() nounwind ssp { define void @t4() nounwind ssp { ; ARM: t4 -; ARM: ldr r0, LCPI3_0 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr ; ARM: ldr r0, [r0] -; ARM: ldr r1, LCPI3_1 -; ARM: ldr r1, [r1] -; ARM: ldr r2, [r1, #16] -; ARM: str r2, [r0, #4] -; ARM: ldr r2, [r1, #20] -; ARM: str r2, [r0, #8] -; ARM: ldrh r1, [r1, #24] +; ARM: ldr r1, [r0, #16] +; ARM: str r1, [r0, #4] +; ARM: ldr r1, [r0, #20] +; ARM: str r1, [r0, #8] +; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: ldr.n r0, LCPI3_0 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] -; THUMB: ldr.n r1, LCPI3_1 -; THUMB: ldr r1, [r1] -; THUMB: ldr r2, [r1, #16] -; THUMB: str r2, [r0, #4] -; THUMB: ldr r2, [r1, #20] -; THUMB: str r2, [r0, #8] -; THUMB: ldrh r1, [r1, #24] +; THUMB: ldr r1, [r0, #16] +; THUMB: str r1, [r0, #4] +; THUMB: ldr r1, [r0, #20] +; THUMB: str r1, [r0, #8] +; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll index 0b8a768..2a88678 100644 --- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll +++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t1(i32* nocapture %ptr) nounwind readonly { entry: diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll index dcfc9d0..e8cc2b2 100644 --- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll +++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; rdar://10418009 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp { diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll index daf56e7..b180e43 100644 --- a/test/CodeGen/ARM/fast-isel-mvn.ll +++ b/test/CodeGen/ARM/fast-isel-mvn.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; rdar://10412592 ; Note: The Thumb code is being generated by the target-independent selector. @@ -104,4 +104,4 @@ entry: ; THUMB: movt r0, #33023 call void @foo(i32 -2130706433) ret void -}
\ No newline at end of file +} diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll index 4203537..e50c3a4 100644 --- a/test/CodeGen/ARM/fast-isel-redefinition.ll +++ b/test/CodeGen/ARM/fast-isel-redefinition.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -regalloc=basic < %s +; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s ; This isn't exactly a useful set of command-line options, but check that it ; doesn't crash. (It was crashing because a register was getting redefined.) diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll index f7f4521..689b169 100644 --- a/test/CodeGen/ARM/fast-isel-ret.ll +++ b/test/CodeGen/ARM/fast-isel-ret.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s ; Sign-extend of i1 currently not supported by fast-isel ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp { @@ -46,3 +46,12 @@ entry: ; CHECK: bx lr ret i16 %a } + +define i16 @ret6(i16 %a) nounwind uwtable ssp { +entry: +; CHECK: ret6 +; CHECK-NOT: uxth +; CHECK-NOT: sxth +; CHECK: bx lr + ret i16 %a +} diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll index 9ac63d6..b83a733 100644 --- a/test/CodeGen/ARM/fast-isel-select.ll +++ b/test/CodeGen/ARM/fast-isel-select.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t1(i1 %c) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 648d711..905543a 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB ; Very basic fast-isel functionality. define i32 @add(i32 %a, i32 %b) nounwind { @@ -142,21 +142,19 @@ define void @test4() { store i32 %b, i32* @test4g ret void -; THUMB: ldr.n r0, LCPI4_1 +; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr +; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr ; THUMB: ldr r0, [r0] -; THUMB: ldr r0, [r0] -; THUMB: adds r0, #1 -; THUMB: ldr.n r1, LCPI4_0 -; THUMB: ldr r1, [r1] -; THUMB: str r0, [r1] +; THUMB: ldr r1, [r0] +; THUMB: adds r1, #1 +; THUMB: str r1, [r0] -; ARM: ldr r0, LCPI4_1 +; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr +; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr ; ARM: ldr r0, [r0] -; ARM: ldr r0, [r0] -; ARM: add r0, r0, #1 -; ARM: ldr r1, LCPI4_0 -; ARM: ldr r1, [r1] -; ARM: str r0, [r1] +; ARM: ldr r1, [r0] +; ARM: add r1, r1, #1 +; ARM: str r1, [r0] } ; Check unaligned stores diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index c4dbeb9..87115cc 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT -; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD +; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT +; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD ; rdar://8984306 define float @test1(float %x, float %y) nounwind { @@ -60,8 +60,8 @@ entry: define float @test5() nounwind { entry: ; SOFT: test5: -; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1 +; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32 ; SOFT: vbsl [[REG6]], [[REG7]], %0 = tail call double (...)* @bar() nounwind diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index ad03202..80925c7 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -1,24 +1,16 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s ; rdar://7461510 +; rdar://10964603 +; Disable this optimization unless we know one of them is zero. define arm_apcscc i32 @t1(float* %a, float* %b) nounwind { entry: -; FINITE: t1: -; FINITE-NOT: vldr -; FINITE: ldr -; FINITE: ldr -; FINITE: cmp r0, r1 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: beq - -; NAN: t1: -; NAN: vldr s0, -; NAN: vldr s1, -; NAN: vcmpe.f32 s1, s0 -; NAN: vmrs apsr_nzcv, fpscr -; NAN: beq +; CHECK: t1: +; CHECK: vldr [[S0:s[0-9]+]], +; CHECK: vldr [[S1:s[0-9]+]], +; CHECK: vcmpe.f32 [[S1]], [[S0]] +; CHECK: vmrs apsr_nzcv, fpscr +; CHECK: beq %0 = load float* %a %1 = load float* %b %2 = fcmp une float %0, %1 @@ -33,17 +25,21 @@ bb2: ret i32 %4 } +; If one side is zero, the other size sign bit is masked off to allow +; +0.0 == -0.0 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind { entry: -; FINITE: t2: -; FINITE-NOT: vldr -; FINITE: ldrd r0, r1, [r0] -; FINITE-NOT: b LBB -; FINITE: cmp r0, #0 -; FINITE: cmpeq r1, #0 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: bne +; CHECK: t2: +; CHECK-NOT: vldr +; CHECK: ldr [[REG1:(r[0-9]+)]], [r0] +; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4] +; CHECK-NOT: b LBB +; CHECK: cmp [[REG1]], #0 +; CHECK: bfc [[REG2]], #31, #1 +; CHECK: cmpeq [[REG2]], #0 +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: bne %0 = load double* %a %1 = fcmp oeq double %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2 @@ -59,13 +55,14 @@ bb2: define arm_apcscc i32 @t3(float* %a, float* %b) nounwind { entry: -; FINITE: t3: -; FINITE-NOT: vldr -; FINITE: ldr r0, [r0] -; FINITE: cmp r0, #0 -; FINITE-NOT: vcmpe.f32 -; FINITE-NOT: vmrs -; FINITE: bne +; CHECK: t3: +; CHECK-NOT: vldr +; CHECK: ldr [[REG3:(r[0-9]+)]], [r0] +; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648 +; CHECK: tst [[REG3]], [[REG4]] +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: bne %0 = load float* %a %1 = fcmp oeq float %0, 0.000000e+00 br i1 %1, label %bb1, label %bb2 diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll new file mode 100644 index 0000000..40e8bb2 --- /dev/null +++ b/test/CodeGen/ARM/fusedMAC.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s +; Check generated fused MAC and MLS. + +define double @fusedMACTest1(double %d1, double %d2, double %d3) { +;CHECK: fusedMACTest1: +;CHECK: vfma.f64 + %1 = fmul double %d1, %d2 + %2 = fadd double %1, %d3 + ret double %2 +} + +define float @fusedMACTest2(float %f1, float %f2, float %f3) { +;CHECK: fusedMACTest2: +;CHECK: vfma.f32 + %1 = fmul float %f1, %f2 + %2 = fadd float %1, %f3 + ret float %2 +} + +define double @fusedMACTest3(double %d1, double %d2, double %d3) { +;CHECK: fusedMACTest3: +;CHECK: vfms.f64 + %1 = fmul double %d2, %d3 + %2 = fsub double %d1, %1 + ret double %2 +} + +define float @fusedMACTest4(float %f1, float %f2, float %f3) { +;CHECK: fusedMACTest4: +;CHECK: vfms.f32 + %1 = fmul float %f2, %f3 + %2 = fsub float %f1, %1 + ret float %2 +} + +define double @fusedMACTest5(double %d1, double %d2, double %d3) { +;CHECK: fusedMACTest5: +;CHECK: vfnma.f64 + %1 = fmul double %d1, %d2 + %2 = fsub double -0.0, %1 + %3 = fsub double %2, %d3 + ret double %3 +} + +define float @fusedMACTest6(float %f1, float %f2, float %f3) { +;CHECK: fusedMACTest6: +;CHECK: vfnma.f32 + %1 = fmul float %f1, %f2 + %2 = fsub float -0.0, %1 + %3 = fsub float %2, %f3 + ret float %3 +} + +define double @fusedMACTest7(double %d1, double %d2, double %d3) { +;CHECK: fusedMACTest7: +;CHECK: vfnms.f64 + %1 = fmul double %d1, %d2 + %2 = fsub double %1, %d3 + ret double %2 +} + +define float @fusedMACTest8(float %f1, float %f2, float %f3) { +;CHECK: fusedMACTest8: +;CHECK: vfnms.f32 + %1 = fmul float %f1, %f2 + %2 = fsub float %1, %f3 + ret float %2 +} + +define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) { +;CHECK: fusedMACTest9: +;CHECK: vfma.f32 + %mul = fmul <2 x float> %a, %b + %add = fadd <2 x float> %mul, %a + ret <2 x float> %add +} + +define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) { +;CHECK: fusedMACTest10: +;CHECK: vfms.f32 + %mul = fmul <2 x float> %a, %b + %sub = fsub <2 x float> %a, %mul + ret <2 x float> %sub +} + +define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) { +;CHECK: fusedMACTest11: +;CHECK: vfma.f32 + %mul = fmul <4 x float> %a, %b + %add = fadd <4 x float> %mul, %a + ret <4 x float> %add +} + +define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) { +;CHECK: fusedMACTest12: +;CHECK: vfms.f32 + %mul = fmul <4 x float> %a, %b + %sub = fsub <4 x float> %a, %mul + ret <4 x float> %sub +} diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll index 9f46ae0..893b426 100644 --- a/test/CodeGen/ARM/hello.ll +++ b/test/CodeGen/ARM/hello.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \ ; RUN: grep mov | count 2 -; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2 +; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2 @str = internal constant [12 x i8] c"Hello World\00" diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll index b073a05..cd870bb 100644 --- a/test/CodeGen/ARM/ifcvt1.ll +++ b/test/CodeGen/ARM/ifcvt1.ll @@ -1,15 +1,17 @@ -; RUN: llc < %s -march=arm -mattr=+v4t -; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s define i32 @t1(i32 %a, i32 %b) { +; CHECK: t1: %tmp2 = icmp eq i32 %a, 0 br i1 %tmp2, label %cond_false, label %cond_true cond_true: +; CHECK: subeq r0, r1, #1 %tmp5 = add i32 %b, 1 ret i32 %tmp5 cond_false: +; CHECK: addne r0, r1, #1 %tmp7 = add i32 %b, -1 ret i32 %tmp7 } diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 18f87bf..a5082d8 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s ; rdar://8402126 ; Make sure if-converter is not predicating vldmia and ldmia. These are ; micro-coded and would have long issue latency even if predicated on diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll index 3e2c578..eef4de0 100644 --- a/test/CodeGen/ARM/ifcvt3.ll +++ b/test/CodeGen/ARM/ifcvt3.ll @@ -1,14 +1,19 @@ -; RUN: llc < %s -march=arm -mattr=+v4t +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK: t1: +; CHECK: cmp r2, #1 +; CHECK: cmpne r2, #7 switch i32 %c, label %cond_next [ i32 1, label %cond_true i32 7, label %cond_true ] cond_true: +; CHECK: addne r0 +; CHECK: bxne %tmp12 = add i32 %a, 1 %tmp1518 = add i32 %tmp12, %b ret i32 %tmp1518 diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index 3615055..95f5c97 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s @x = external global i32* ; <i32**> [#uses=1] diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 2327657..a00deda 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s define void @foo(i32 %X, i32 %Y) { entry: diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll index 1d32322..d188fae 100644 --- a/test/CodeGen/ARM/insn-sched1.ll +++ b/test/CodeGen/ARM/insn-sched1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+v6 -; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\ +; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\ ; RUN: grep mov | count 3 define i32 @test(i32 %x) { diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index d72e9bf..a588bc3 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3 ; rdar://6949835 +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY ; Magic ARM pair hints works best with linearscan / fast. @@ -23,3 +25,47 @@ entry: %2 = mul i64 %1, %a ret i64 %2 } + +; rdar://10435045 mixed LDRi8/LDRi12 +; +; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be +; able to generate an LDRD pair here, but this is highly sensitive to +; regalloc hinting. So, this doubles as a register allocation +; test. RABasic currently does a better job within the inner loop +; because of its *lack* of hinting ability. Whereas RAGreedy keeps +; R0/R1/R2 live as the three arguments, forcing the LDRD's odd +; destination into R3. We then sensibly split LDRD again rather then +; evict another live range or use callee saved regs. Sorry if the test +; is sensitive to Regalloc changes, but it is an interesting case. +; +; BASIC: @f +; BASIC: %bb +; BASIC: ldrd +; BASIC: str +; GREEDY: @f +; GREEDY: %bb +; GREEDY: ldr +; GREEDY: ldr +; GREEDY: str +define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind { +entry: + %0 = add nsw i32 %n, -1 ; <i32> [#uses=2] + %1 = icmp sgt i32 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %bb, label %return + +bb: ; preds = %bb, %entry + %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ] ; <i32> [#uses=3] + %scevgep = getelementptr i32* %a, i32 %i.03 ; <i32*> [#uses=1] + %scevgep4 = getelementptr i32* %b, i32 %i.03 ; <i32*> [#uses=1] + %tmp = add i32 %i.03, 1 ; <i32> [#uses=3] + %scevgep5 = getelementptr i32* %a, i32 %tmp ; <i32*> [#uses=1] + %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %3 = load i32* %scevgep5, align 4 ; <i32> [#uses=1] + %4 = add nsw i32 %3, %2 ; <i32> [#uses=1] + store i32 %4, i32* %scevgep4, align 4 + %exitcond = icmp eq i32 %tmp, %0 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg new file mode 100644 index 0000000..dd6c50d --- /dev/null +++ b/test/CodeGen/ARM/lit.local.cfg @@ -0,0 +1,13 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +targets = set(root.targets_to_build.split()) +if not 'ARM' in targets: + config.unsupported = True + diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll new file mode 100644 index 0000000..bdd4081 --- /dev/null +++ b/test/CodeGen/ARM/load_i1_select.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios0.0.0" + +; Codegen should only compare one bit of the loaded value. +; rdar://10887484 + +; CHECK: foo: +; CHECK: ldrb r[[R0:[0-9]+]], [r0] +; CHECK: tst.w r[[R0]], #1 +define void @foo(i8* %call, double* %p) nounwind { +entry: + %tmp2 = load i8* %call + %tmp3 = trunc i8 %tmp2 to i1 + %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00 + store double %cond, double* %p + ret void +} diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll new file mode 100644 index 0000000..8068abd --- /dev/null +++ b/test/CodeGen/ARM/log2_not_readnone.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s + +; Log2 and exp2 are string-matched to intrinsics. If they are not declared +; readnone, they can't be changed to intrinsics (because they can change errno). + +declare double @log2(double) +declare double @exp2(double) + +define void @f() { + ; CHECK: bl log2 + %1 = call double @log2(double 0.000000e+00) + ; CHECK: bl exp2 + %2 = call double @exp2(double 0.000000e+00) + ret void +} diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index bf26a96..5b4cf9d 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -12,7 +12,7 @@ ; CHECK: add target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-macosx10.7.0" +target triple = "thumbv7-apple-ios" %struct.partition_entry = type { i32, i32, i64, i64 } diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index c77402f..f566974 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s ;rdar://8003725 @G1 = external global i32 @@ -6,6 +6,7 @@ define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) { entry: +; CHECK: f1: ; CHECK: cmp ; CHECK: moveq ; CHECK-NOT: cmp @@ -16,3 +17,31 @@ entry: %tmp4 = add i32 %tmp2, %tmp3 ret i32 %tmp4 } + +@foo = external global i32 +@bar = external global [250 x i8], align 1 + +; CSE of cmp across BB boundary +; rdar://10660865 +define void @f2() nounwind ssp { +entry: +; CHECK: f2: +; CHECK: cmp +; CHECK: poplt +; CHECK-NOT: cmp +; CHECK: movle + %0 = load i32* @foo, align 4 + %cmp28 = icmp sgt i32 %0, 0 + br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader + +for.body.lr.ph: ; preds = %entry + %1 = icmp sgt i32 %0, 1 + %smax = select i1 %1, i32 %0, i32 1 + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + unreachable + +for.cond1.preheader: ; preds = %entry + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index aeda022..fe0056c 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s @from = common global [500 x i32] zeroinitializer, align 4 @@ -18,6 +19,8 @@ entry: ; EABI memset swaps arguments ; CHECK: mov r1, #0 ; CHECK: memset + ; DARWIN: movs r1, #0 + ; DARWIN: memset ; EABI: mov r2, #0 ; EABI: __aeabi_memset call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false) diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll new file mode 100644 index 0000000..677b9c2 --- /dev/null +++ b/test/CodeGen/ARM/neon_spill.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -verify-machineinstrs +; PR12177 +; +; This test case spills a QQQQ register. +; +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +%0 = type { %1*, i32, i32, i32, i8 } +%1 = type { i32 (...)** } +%2 = type { i8*, i8*, i8*, i32 } +%3 = type { %4 } +%4 = type { i32 (...)**, %2, %4*, i8, i8 } + +declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind + +declare arm_aapcs_vfpcc %0** @func2() + +declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32) + +declare arm_aapcs_vfpcc %2** @func4() + +define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 { + call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind + %2 = call arm_aapcs_vfpcc %0** @func2() nounwind + %3 = load %0** %2, align 4, !tbaa !0 + store float 0.000000e+00, float* undef, align 4 + %4 = call arm_aapcs_vfpcc %2* @func3(%2* undef, %2* undef, i32 2956) nounwind + call arm_aapcs_vfpcc void @func1(%0* %3, float* undef, float* undef, %2* undef) + %5 = call arm_aapcs_vfpcc %0** @func2() nounwind + store float 1.000000e+00, float* undef, align 4 + call arm_aapcs_vfpcc void @func1(%0* undef, float* undef, float* undef, %2* undef) + store float 1.500000e+01, float* undef, align 4 + %6 = call arm_aapcs_vfpcc %2** @func4() nounwind + %7 = call arm_aapcs_vfpcc %2* @func3(%2* undef, %2* undef, i32 2971) nounwind + %8 = fadd float undef, -1.000000e+05 + store float %8, float* undef, align 16, !tbaa !3 + %9 = call arm_aapcs_vfpcc i32 @rand() nounwind + %10 = fmul float undef, 2.000000e+05 + %11 = fadd float %10, -1.000000e+05 + store float %11, float* undef, align 4, !tbaa !3 + call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind + ret void +} + +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind + +declare arm_aapcs_vfpcc i32 @rand() + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"float", metadata !1} diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll new file mode 100644 index 0000000..e28b578 --- /dev/null +++ b/test/CodeGen/ARM/odr_comdat.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI + +; Checking that a comdat group gets generated correctly for a static member +; of instantiated C++ templates. +; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate +; section 5.2.6 Instantiated templates +; "Any static member data object is emitted in a COMDAT identified by its mangled +; name, in any object file with a reference to its name symbol." + +; Case 1: variable is not explicitly initialized, and ends up in a .bss section +; ARMGNUEABI: .section .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat +@_ZN1CIiE1iE = weak_odr global i32 0, align 4 + +; Case 2: variable is explicitly initialized, and ends up in a .data section +; ARMGNUEABI: .section .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat +@_ZN1CIiE1jE = weak_odr global i32 12, align 4 diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index ea44c28..6bb6743 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -112,11 +112,11 @@ entry: ret i32 %conv3 } +; rdar://10750814 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone { entry: ; CHECK: test9 -; CHECK: rev r0, r0 -; CHECK: lsr r0, r0, #16 +; CHECK: rev16 r0, r0 %conv = zext i16 %v to i32 %shr4 = lshr i32 %conv, 8 %shl = shl nuw nsw i32 %conv, 8 diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 8a3133a..3a66ec5 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { %s = or i32 %z, %y ret i32 %s } + +define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind { +entry: +; ARM: t5: +; ARM-NOT: moveq +; ARM: orreq r2, r2, #1 + +; T2: t5: +; T2-NOT: moveq +; T2: orreq.w r2, r2, #1 + %tmp1 = icmp eq i32 %a, %b + %tmp2 = zext i1 %tmp1 to i32 + %tmp3 = or i32 %tmp2, %c + ret i32 %tmp3 +} + +define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; ARM: t6: +; ARM-NOT: movge +; ARM: eorlt r3, r3, r2 + +; T2: t6: +; T2-NOT: movge +; T2: eorlt.w r3, r3, r2 + %cond = icmp slt i32 %a, %b + %tmp1 = select i1 %cond, i32 %c, i32 0 + %tmp2 = xor i32 %tmp1, %d + ret i32 %tmp2 +} + +define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind { +entry: +; ARM: t7: +; ARM-NOT: lsleq +; ARM: andeq r2, r2, r2, lsl #1 + +; T2: t7: +; T2-NOT: lsleq.w +; T2: andeq.w r2, r2, r2, lsl #1 + %tmp1 = shl i32 %c, 1 + %cond = icmp eq i32 %a, %b + %tmp2 = select i1 %cond, i32 %tmp1, i32 -1 + %tmp3 = and i32 %c, %tmp2 + ret i32 %tmp3 +} + diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 964cef0..521ffa1 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -54,12 +54,12 @@ declare i8* @malloc(...) define fastcc void @test4(i16 %addr) nounwind { entry: ; A8: test4: -; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2] -; A8: str [[REG]], [r0, r1, lsl #2] +; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]! +; A8: str [[REG]], [r0] ; A9: test4: -; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2] -; A9: str [[REG]], [r0, r1, lsl #2] +; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]! +; A9: str [[REG]], [r0] %0 = tail call i8* (...)* @malloc(i32 undef) nounwind %1 = bitcast i8* %0 to i32* %2 = sext i16 %addr to i32 diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index bf4e55c..057ea11 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly define void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: -; CHECK: bic sp, sp, #15 +; CHECK: bic {{.*}}, #15 ; CHECK: vst1.64 {{.*}}sp, :128 ; CHECK: vld1.64 {{.*}}sp, :128 entry: diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll new file mode 100644 index 0000000..e015bf0 --- /dev/null +++ b/test/CodeGen/ARM/tail-dup.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s + +; We should be able to tail-duplicate the basic block containing the indirectbr +; into all of its predecessors. +; CHECK: fn: +; CHECK: mov pc +; CHECK: mov pc +; CHECK: mov pc + +@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4 + +define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp { +entry: + %0 = load i32* %opcodes, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0 + br label %indirectgoto + +INCREMENT: ; preds = %indirectgoto + %inc = add nsw i32 %result.0, 1 + %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1 + br label %indirectgoto + +DECREMENT: ; preds = %indirectgoto + %dec = add nsw i32 %result.0, -1 + %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2 + br label %indirectgoto + +indirectgoto: ; preds = %DECREMENT, %INCREMENT, %entry + %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ] + %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ] + %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ] + %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1 + %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4 + indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT] + +RETURN: ; preds = %indirectgoto + ret i32 %result.0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll new file mode 100644 index 0000000..93340c3 --- /dev/null +++ b/test/CodeGen/ARM/test-sharedidx.ll @@ -0,0 +1,96 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s +; REQUIRES: asserts + +; @sharedidx is an unrolled variant of this loop: +; for (unsigned long i = 0; i < len; i += s) { +; c[i] = a[i] + b[i]; +; } +; where 's' cannot be folded into the addressing mode. +; +; This is not quite profitable to chain. But with -stress-ivchain, we +; can form three address chains in place of the shared induction +; variable. + +; rdar://10674430 +define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { +entry: +; CHECK: sharedidx: + %cmp8 = icmp eq i32 %len, 0 + br i1 %cmp8, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body.3 +; CHECK: %for.body +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! + %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8* %a, i32 %i.09 + %0 = load i8* %arrayidx, align 1 + %conv6 = zext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09 + %1 = load i8* %arrayidx1, align 1 + %conv27 = zext i8 %1 to i32 + %add = add nsw i32 %conv27, %conv6 + %conv3 = trunc i32 %add to i8 + %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09 + store i8 %conv3, i8* %arrayidx4, align 1 + %add5 = add i32 %i.09, %s + %cmp = icmp ult i32 %add5, %len + br i1 %cmp, label %for.body.1, label %for.end + +for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry + ret void + +for.body.1: ; preds = %for.body +; CHECK: %for.body.1 +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! + %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5 + %2 = load i8* %arrayidx.1, align 1 + %conv6.1 = zext i8 %2 to i32 + %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5 + %3 = load i8* %arrayidx1.1, align 1 + %conv27.1 = zext i8 %3 to i32 + %add.1 = add nsw i32 %conv27.1, %conv6.1 + %conv3.1 = trunc i32 %add.1 to i8 + %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5 + store i8 %conv3.1, i8* %arrayidx4.1, align 1 + %add5.1 = add i32 %add5, %s + %cmp.1 = icmp ult i32 %add5.1, %len + br i1 %cmp.1, label %for.body.2, label %for.end + +for.body.2: ; preds = %for.body.1 +; CHECK: %for.body.2 +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! + %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1 + %4 = load i8* %arrayidx.2, align 1 + %conv6.2 = zext i8 %4 to i32 + %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1 + %5 = load i8* %arrayidx1.2, align 1 + %conv27.2 = zext i8 %5 to i32 + %add.2 = add nsw i32 %conv27.2, %conv6.2 + %conv3.2 = trunc i32 %add.2 to i8 + %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1 + store i8 %conv3.2, i8* %arrayidx4.2, align 1 + %add5.2 = add i32 %add5.1, %s + %cmp.2 = icmp ult i32 %add5.2, %len + br i1 %cmp.2, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.body.2 +; CHECK: %for.body.3 +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! +; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! + %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2 + %6 = load i8* %arrayidx.3, align 1 + %conv6.3 = zext i8 %6 to i32 + %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2 + %7 = load i8* %arrayidx1.3, align 1 + %conv27.3 = zext i8 %7 to i32 + %add.3 = add nsw i32 %conv27.3, %conv6.3 + %conv3.3 = trunc i32 %add.3 to i8 + %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2 + store i8 %conv3.3, i8* %arrayidx4.3, align 1 + %add5.3 = add i32 %add5.2, %s + %cmp.3 = icmp ult i32 %add5.3, %len + br i1 %cmp.3, label %for.body, label %for.end +} diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index be95657..0c23879 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -381,3 +381,20 @@ entry: store <4 x float> %b, <4 x float> *%p ret void } + +; Vector any_extends must be selected as either vmovl.u or vmovl.s. +; rdar://10723651 +define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp { +entry: +;CHECK: any_extend +;CHECK: vmovl + %and.i186 = zext <4 x i1> %x to <4 x i32> + %add.i185 = sub <4 x i32> %and.i186, %y + %sub.i = sub <4 x i32> %add.i185, zeroinitializer + %add.i = add <4 x i32> %sub.i, zeroinitializer + %vmovn.i = trunc <4 x i32> %add.i to <4 x i16> + tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2) + unreachable +} + +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 915a84b..fb05a20 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { ret void } +define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind { +;CHECK: vst2update +;CHECK: vst2.16 {d16, d17}, [r0]! + %tmp1 = load <4 x i16>* %B + tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2) + %t5 = getelementptr inbounds i8* %out, i32 16 + ret i8* %t5 +} + +define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 { +;CHECK: vst2update2 +;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]! + %tmp1 = load <4 x float>* %this + call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind + %tmp2 = getelementptr inbounds i8* %out, i32 32 + ret i8* %tmp2 +} + declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind |