diff options
Diffstat (limited to 'test/CodeGen/ARM')
55 files changed, 1358 insertions, 85 deletions
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll index 33f935e..a63cdd4 100644 --- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll +++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 | grep r9 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ -; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer +; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer ; | grep 35 define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) { diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll index b543c57..8d3337c 100644 --- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll +++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep {add.*#0} +; RUN: llc < %s -march=arm | not grep "add.*#0" define i32 @foo() { entry: diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll index d2eb85d..670048b 100644 --- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll +++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep {str.*\\!} +; RUN: llc < %s -march=arm | not grep "str.*\!" %struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 } %struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll index fd2f462..3754db0 100644 --- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll +++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast -optimize-regalloc=0 ; PR1925 %struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 } diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll index 44da8e7..5fbed0d 100644 --- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll +++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast -optimize-regalloc=0 ; PR1925 %"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" } diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll index 3526722..7342f69 100644 --- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll +++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep {swi 107} +; RUN: llc < %s -march=arm | grep "swi 107" define i32 @_swilseek(i32) nounwind { entry: diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll index 813bf3c..7d4cc6e 100644 --- a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll +++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -regalloc=fast -verify-machineinstrs +; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs target triple = "arm-pc-linux-gnu" ; This test case would accidentally use the same physreg for two virtregs diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll index 5ce600d..b21bb00 100644 --- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll +++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS +; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; Radar 10266272 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios4.0.0" diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll index 872eca3..f1c85f1 100644 --- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -60,8 +60,16 @@ for.end: ; preds = %entry ret void } +; Check that pseudo-expansion preserves <undef> flags. +define void @foo3(i8* %p) nounwind ssp { +entry: + tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4) + ret void +} + declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll new file mode 100644 index 0000000..b3a7e34 --- /dev/null +++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -0,0 +1,71 @@ +; RUN: llc -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 < %s + +; CodeGen SplitCriticalEdge() shouldn't try to break edge to a landing pad. +; rdar://11300144 + +%0 = type opaque +%class.FunctionInterpreter.3.15.31 = type { %class.Parser.1.13.29, %class.Parser.1.13.29*, %struct.ParserVariable.2.14.30*, i32 } +%class.Parser.1.13.29 = type { i32 (...)**, %class.Parser.1.13.29* } +%struct.ParserVariable.2.14.30 = type opaque +%struct.ParseErrorMsg.0.12.28 = type { i32, i32, i32 } + +@_ZTI13ParseErrorMsg = external hidden unnamed_addr constant { i8*, i8* } +@"OBJC_IVAR_$_MUMathExpressionDoubleBased.mInterpreter" = external hidden global i32, section "__DATA, __objc_ivar", align 4 +@"\01L_OBJC_SELECTOR_REFERENCES_14" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" + +declare i8* @objc_msgSend(i8*, i8*, ...) + +declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone + +declare i8* @__cxa_begin_catch(i8*) + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +declare void @__cxa_end_catch() + +declare void @_ZSt9terminatev() + +define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp { +entry: + %call = invoke double undef(%class.FunctionInterpreter.3.15.31* undef) optsize + to label %try.cont unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + catch i8* bitcast ({ i8*, i8* }* @_ZTI13ParseErrorMsg to i8*) + br i1 undef, label %catch, label %eh.resume + +catch: ; preds = %lpad + invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.ParseErrorMsg.0.12.28*)*)(i8* undef, i8* undef, %struct.ParseErrorMsg.0.12.28* undef) optsize + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: ; preds = %catch + br label %try.cont + +try.cont: ; preds = %invoke.cont2, %entry + %value.0 = phi double [ 0x7FF8000000000000, %invoke.cont2 ], [ %call, %entry ] + ret double %value.0 + +lpad1: ; preds = %catch + %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: ; preds = %lpad1, %lpad + resume { i8*, i32 } undef + +terminate.lpad: ; preds = %lpad1 + %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + catch i8* null + unreachable +} + +declare i32 @__gxx_personality_sj0(...) + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} diff --git a/test/CodeGen/ARM/2012-05-29-TailDupBug.ll b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll new file mode 100644 index 0000000..1a57f04 --- /dev/null +++ b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll @@ -0,0 +1,140 @@ +; RUN: llc -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -verify-machineinstrs < %s + +; Teach taildup to update livein set to appease verifier. +; rdar://11538365 + +%struct.__CFString.2 = type opaque + +declare void @CFRelease(i8*) + +define hidden fastcc i32 @t() ssp { +entry: + %mylocale.i.i = alloca [256 x i8], align 1 + br i1 undef, label %return, label %CFStringIsHyphenationAvailableForLocale.exit + +CFStringIsHyphenationAvailableForLocale.exit: ; preds = %entry + br i1 undef, label %return, label %if.end + +if.end: ; preds = %CFStringIsHyphenationAvailableForLocale.exit + br i1 undef, label %if.end8.thread.i, label %if.then.i + +if.then.i: ; preds = %if.end + br i1 undef, label %if.end8.thread.i, label %if.end8.i + +if.end8.thread.i: ; preds = %if.then.i, %if.end + unreachable + +if.end8.i: ; preds = %if.then.i + br i1 undef, label %if.then11.i, label %__CFHyphenationPullTokenizer.exit + +if.then11.i: ; preds = %if.end8.i + unreachable + +__CFHyphenationPullTokenizer.exit: ; preds = %if.end8.i + br i1 undef, label %if.end68, label %if.then3 + +if.then3: ; preds = %__CFHyphenationPullTokenizer.exit + br i1 undef, label %cond.end, label %cond.false + +cond.false: ; preds = %if.then3 + br label %cond.end + +cond.end: ; preds = %cond.false, %if.then3 + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %cond.end + unreachable + +while.end: ; preds = %cond.end + br i1 undef, label %if.end5.i, label %if.then.i16 + +if.then.i16: ; preds = %while.end + br i1 undef, label %if.then4.i, label %if.end5.i + +if.then4.i: ; preds = %if.then.i16 + br i1 false, label %cleanup.thread, label %if.end.i20 + +if.end5.i: ; preds = %if.then.i16, %while.end + unreachable + +if.end.i20: ; preds = %if.then4.i + br label %for.body.i146.i + +for.body.i146.i: ; preds = %for.body.i146.i, %if.end.i20 + br i1 undef, label %if.end20.i, label %for.body.i146.i + +if.end20.i: ; preds = %for.body.i146.i + br i1 undef, label %cleanup.thread, label %if.end23.i + +if.end23.i: ; preds = %if.end20.i + br label %for.body.i94.i + +for.body.i94.i: ; preds = %for.body.i94.i, %if.end23.i + br i1 undef, label %if.then28.i, label %for.body.i94.i + +if.then28.i: ; preds = %for.body.i94.i + br i1 undef, label %cond.true.i26, label %land.lhs.true + +cond.true.i26: ; preds = %if.then28.i + br label %land.lhs.true + +land.lhs.true: ; preds = %cond.true.i26, %if.then28.i + br i1 false, label %cleanup.thread, label %if.end35 + +if.end35: ; preds = %land.lhs.true + br i1 undef, label %cleanup.thread, label %if.end45 + +if.end45: ; preds = %if.end35 + br i1 undef, label %if.then50, label %if.end.i37 + +if.end.i37: ; preds = %if.end45 + br label %if.then50 + +if.then50: ; preds = %if.end.i37, %if.end45 + br i1 undef, label %__CFHyphenationGetHyphensForString.exit, label %if.end.i + +if.end.i: ; preds = %if.then50 + br i1 undef, label %cleanup.i, label %cond.true.i + +cond.true.i: ; preds = %if.end.i + br i1 undef, label %for.cond16.preheader.i, label %for.cond57.preheader.i + +for.cond16.preheader.i: ; preds = %cond.true.i + %cmp1791.i = icmp sgt i32 undef, 1 + br i1 %cmp1791.i, label %for.body18.i, label %for.cond57.preheader.i + +for.cond57.preheader.i: ; preds = %for.cond16.preheader.i, %cond.true.i + %sub69.i = add i32 undef, -2 + br label %cleanup.i + +for.body18.i: ; preds = %for.cond16.preheader.i + store i16 0, i16* undef, align 2 + br label %while.body.i + +while.body.i: ; preds = %while.body.i, %for.body18.i + br label %while.body.i + +cleanup.i: ; preds = %for.cond57.preheader.i, %if.end.i + br label %__CFHyphenationGetHyphensForString.exit + +__CFHyphenationGetHyphensForString.exit: ; preds = %cleanup.i, %if.then50 + %retval.1.i = phi i32 [ 0, %cleanup.i ], [ -1, %if.then50 ] + %phitmp = bitcast %struct.__CFString.2* null to i8* + br label %if.end68 + +cleanup.thread: ; preds = %if.end35, %land.lhs.true, %if.end20.i, %if.then4.i + call void @llvm.stackrestore(i8* null) + br label %return + +if.end68: ; preds = %__CFHyphenationGetHyphensForString.exit, %__CFHyphenationPullTokenizer.exit + %hyphenCount.2 = phi i32 [ %retval.1.i, %__CFHyphenationGetHyphensForString.exit ], [ 0, %__CFHyphenationPullTokenizer.exit ] + %_token.1 = phi i8* [ %phitmp, %__CFHyphenationGetHyphensForString.exit ], [ undef, %__CFHyphenationPullTokenizer.exit ] + call void @CFRelease(i8* %_token.1) + br label %return + +return: ; preds = %if.end68, %cleanup.thread, %CFStringIsHyphenationAvailableForLocale.exit, %entry + %retval.1 = phi i32 [ %hyphenCount.2, %if.end68 ], [ -1, %CFStringIsHyphenationAvailableForLocale.exit ], [ -1, %cleanup.thread ], [ -1, %entry ] + ret i32 %retval.1 +} + +declare void @llvm.stackrestore(i8*) nounwind diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll new file mode 100644 index 0000000..b05ec63 --- /dev/null +++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -o /dev/null "-mtriple=thumbv7-apple-ios" -debug-only=post-RA-sched 2> %t +; RUN: FileCheck %s < %t +; REQUIRES: asserts +; Make sure that mayalias store-load dependencies have one cycle +; latency regardless of whether they are barriers or not. + +; CHECK: ** List Scheduling +; CHECK: SU(2){{.*}}STR{{.*}}Volatile +; CHECK-NOT: ch SU +; CHECK: ch SU(3): Latency=1 +; CHECK-NOT: ch SU +; CHECK: SU(3){{.*}}LDR{{.*}}Volatile +; CHECK-NOT: ch SU +; CHECK: ch SU(2): Latency=1 +; CHECK-NOT: ch SU +; CHECK: ** List Scheduling +; CHECK: SU(2){{.*}}STR{{.*}} +; CHECK-NOT: ch SU +; CHECK: ch SU(3): Latency=1 +; CHECK-NOT: ch SU +; CHECK: SU(3){{.*}}LDR{{.*}} +; CHECK-NOT: ch SU +; CHECK: ch SU(2): Latency=1 +; CHECK-NOT: ch SU +define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind { +entry: + store volatile i32 65540, i32* %p1, align 4, !tbaa !0 + %0 = load volatile i32* %p2, align 4, !tbaa !0 + ret i32 %0 +} + +define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind { +entry: + store i32 65540, i32* %p1, align 4, !tbaa !0 + %0 = load i32* %p2, align 4, !tbaa !0 + ret i32 %0 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll new file mode 100644 index 0000000..b55f1ca --- /dev/null +++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s + +@var_v2i8 = global <2 x i8> zeroinitializer +@var_v4i8 = global <4 x i8> zeroinitializer + +@var_v2i16 = global <2 x i16> zeroinitializer +@var_v4i16 = global <4 x i16> zeroinitializer + +@var_v2i32 = global <2 x i32> zeroinitializer +@var_v4i32 = global <4 x i32> zeroinitializer + +@var_v2i64 = global <2 x i64> zeroinitializer + +define void @test_v2i8tov2i32() { +; CHECK: test_v2i8tov2i32: + + %i8val = load <2 x i8>* @var_v2i8 + + %i32val = sext <2 x i8> %i8val to <2 x i32> + store <2 x i32> %i32val, <2 x i32>* @var_v2i32 +; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16] +; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]] +; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}} + + ret void +} + +define void @test_v2i8tov2i64() { +; CHECK: test_v2i8tov2i64: + + %i8val = load <2 x i8>* @var_v2i8 + + %i64val = sext <2 x i8> %i8val to <2 x i64> + store <2 x i64> %i64val, <2 x i64>* @var_v2i64 +; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16] +; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]] +; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}} +; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}} + +; %i64val = sext <2 x i8> %i8val to <2 x i64> +; store <2 x i64> %i64val, <2 x i64>* @var_v2i64 + + ret void +} + +define void @test_v4i8tov4i16() { +; CHECK: test_v4i8tov4i16: + + %i8val = load <4 x i8>* @var_v4i8 + + %i16val = sext <4 x i8> %i8val to <4 x i16> + store <4 x i16> %i16val, <4 x i16>* @var_v4i16 +; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32] +; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]] +; CHECK-NOT: vmovl.s16 + + ret void +; CHECK: bx lr +} + +define void @test_v4i8tov4i32() { +; CHECK: test_v4i8tov4i32: + + %i8val = load <4 x i8>* @var_v4i8 + + %i16val = sext <4 x i8> %i8val to <4 x i32> + store <4 x i32> %i16val, <4 x i32>* @var_v4i32 +; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32] +; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]] +; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}} + + ret void +} + +define void @test_v2i16tov2i32() { +; CHECK: test_v2i16tov2i32: + + %i16val = load <2 x i16>* @var_v2i16 + + %i32val = sext <2 x i16> %i16val to <2 x i32> + store <2 x i32> %i32val, <2 x i32>* @var_v2i32 +; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32] +; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]] +; CHECK-NOT: vmovl + + ret void +; CHECK: bx lr +} + +define void @test_v2i16tov2i64() { +; CHECK: test_v2i16tov2i64: + + %i16val = load <2 x i16>* @var_v2i16 + + %i64val = sext <2 x i16> %i16val to <2 x i64> + store <2 x i64> %i64val, <2 x i64>* @var_v2i64 +; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32] +; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]] +; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]] + + ret void +} diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll index 9ccff07..6da9089 100644 --- a/test/CodeGen/ARM/addrmode.ll +++ b/test/CodeGen/ARM/addrmode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4 +; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4 define i32 @t1(i32 %a) { %b = mul i32 %a, 9 diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll index 31c5007..d668334 100644 --- a/test/CodeGen/ARM/aliases.ll +++ b/test/CodeGen/ARM/aliases.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t -; RUN: grep { = } %t | count 5 +; RUN: grep " = " %t | count 5 ; RUN: grep globl %t | count 4 ; RUN: grep weak %t | count 1 diff --git a/test/CodeGen/ARM/bicZext.ll b/test/CodeGen/ARM/bicZext.ll new file mode 100644 index 0000000..cf4b7ba --- /dev/null +++ b/test/CodeGen/ARM/bicZext.ll @@ -0,0 +1,19 @@ +; RUN: llc %s -o - | FileCheck %s +; ModuleID = 'bic.c' +target triple = "thumbv7-apple-ios3.0.0" + +define zeroext i16 @foo16(i16 zeroext %f) nounwind readnone optsize ssp { +entry: + ; CHECK: .thumb_func _foo16 + ; CHECK: {{bic[^#]*#3}} + %and = and i16 %f, -4 + ret i16 %and +} + +define i32 @foo32(i32 %f) nounwind readnone optsize ssp { +entry: + ; CHECK: .thumb_func _foo32 + ; CHECK: {{bic[^#]*#3}} + %and = and i32 %f, -4 + ret i32 %and +} diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll index efe29d8..00b16888 100644 --- a/test/CodeGen/ARM/call_nolink.ll +++ b/test/CodeGen/ARM/call_nolink.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: not grep {bx lr} +; RUN: not grep "bx lr" %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } @r = external global [14 x i32] ; <[14 x i32]*> [#uses=4] diff --git a/test/CodeGen/ARM/cmn.ll b/test/CodeGen/ARM/cmn.ll new file mode 100644 index 0000000..ef73165 --- /dev/null +++ b/test/CodeGen/ARM/cmn.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple thumbv7-apple-ios | FileCheck %s +; <rdar://problem/7569620> + +define i32 @compare_i_gt(i32 %a) { +entry: +; CHECK: compare_i_gt +; CHECK-NOT: mvn +; CHECK: cmn + %cmp = icmp sgt i32 %a, -78 + %. = zext i1 %cmp to i32 + ret i32 %. +} + +define i32 @compare_r_eq(i32 %a, i32 %b) { +entry: +; CHECK: compare_r_eq +; CHECK: cmn + %sub = sub nsw i32 0, %b + %cmp = icmp eq i32 %a, %sub + %. = zext i1 %cmp to i32 + ret i32 %. +} diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll new file mode 100644 index 0000000..fb0f4c6 --- /dev/null +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios0.0.0" + +; CHECK: f +; The vld2 and vst2 are not aligned wrt each other, the second Q loaded is the +; first one stored. +; The coalescer must find a super-register larger than QQ to eliminate the copy +; setting up the vst2 data. +; CHECK: vld2 +; CHECK-NOT: vorr +; CHECK-NOT: vmov +; CHECK: vst2 +define void @f(float* %p, i32 %c) nounwind ssp { +entry: + %0 = bitcast float* %p to i8* + %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4) + %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 + %add.ptr = getelementptr inbounds float* %p, i32 8 + %1 = bitcast float* %add.ptr to i8* + tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4) + ret void +} + +; CHECK: f1 +; FIXME: This function still has copies. +define void @f1(float* %p, i32 %c) nounwind ssp { +entry: + %0 = bitcast float* %p to i8* + %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4) + %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 + %add.ptr = getelementptr inbounds float* %p, i32 8 + %1 = bitcast float* %add.ptr to i8* + %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4) + %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0 + tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4) + ret void +} + +; CHECK: f2 +; FIXME: This function still has copies. +define void @f2(float* %p, i32 %c) nounwind ssp { +entry: + %0 = bitcast float* %p to i8* + %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4) + %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 + br label %do.body + +do.body: ; preds = %do.body, %entry + %qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ] + %c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ] + %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ] + %add.ptr = getelementptr inbounds float* %p.addr.0, i32 8 + %1 = bitcast float* %add.ptr to i8* + %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4) + %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0 + %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1 + tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4) + %dec = add nsw i32 %c.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll index 1d011be..62b9e43 100644 --- a/test/CodeGen/ARM/cse-libcalls.ll +++ b/test/CodeGen/ARM/cse-libcalls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1 +; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll new file mode 100644 index 0000000..a66a9d1 --- /dev/null +++ b/test/CodeGen/ARM/data-in-code-annotations.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s + +define double @f1() nounwind { +; CHECK: f1: +; CHECK: .data_region +; CHECK: .long 1413754129 +; CHECK: .long 1074340347 +; CHECK: .end_data_region + ret double 0x400921FB54442D11 +} + + +define i32 @f2() { +; CHECK: f2: +; CHECK: .data_region jt32 +; CHECK: .end_data_region + +entry: + switch i32 undef, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb6 + i32 3, label %sw.bb13 + i32 4, label %sw.bb20 + ] + +sw.bb: ; preds = %entry + br label %return + +sw.bb6: ; preds = %entry + br label %return + +sw.bb13: ; preds = %entry + br label %return + +sw.bb20: ; preds = %entry + %div = sdiv i32 undef, undef + br label %return + +return: ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry + %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll index 49c4103..7fbf8f4 100644 --- a/test/CodeGen/ARM/divmod.ll +++ b/test/CodeGen/ARM/divmod.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { entry: @@ -56,3 +56,17 @@ bb1: declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind + +; rdar://11714607 +define i32 @howmany(i32 %x, i32 %y) nounwind { +entry: +; CHECK: howmany: +; CHECK: bl ___udivmodsi4 +; CHECK-NOT: ___udivsi3 + %rem = urem i32 %x, %y + %div = udiv i32 %x, %y + %not.cmp = icmp ne i32 %rem, 0 + %add = zext i1 %not.cmp to i32 + %cond = add i32 %add, %div + ret i32 %cond +} diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll new file mode 100644 index 0000000..14721a4 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB + +; Fast-isel can't handle non-double multi-reg retvals. +; This test just check to make sure we don't hit the assert in FinishCall. +define <16 x i8> @foo() nounwind ssp { +entry: + ret <16 x i8> zeroinitializer +} + +define void @t1() nounwind ssp { +entry: +; ARM: @t1 +; THUMB: @t1 + %call = call <16 x i8> @foo() + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll index dd460b2..edc805a 100644 --- a/test/CodeGen/ARM/fast-isel-call.ll +++ b/test/CodeGen/ARM/fast-isel-call.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG define i32 @t0(i1 zeroext %a) nounwind { %1 = zext i1 %a to i32 @@ -99,6 +101,11 @@ entry: ; ARM: uxtb r9, r12 ; ARM: str r9, [sp, #4] ; ARM: bl _bar +; ARM-LONG: @t10 +; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr +; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr +; ARM-LONG: ldr lr, [lr] +; ARM-LONG: blx lr ; THUMB: @t10 ; THUMB: movs r0, #0 ; THUMB: movt r0, #0 @@ -121,8 +128,96 @@ entry: ; THUMB: uxtb.w r9, r12 ; THUMB: str.w r9, [sp, #4] ; THUMB: bl _bar +; THUMB-LONG: @t10 +; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr +; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr +; THUMB-LONG: ldr.w lr, [lr] +; THUMB-LONG: blx lr %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 } declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext) + +define i32 @bar0(i32 %i) nounwind { + ret i32 0 +} + +define void @foo3() uwtable { +; ARM: movw r0, #0 +; ARM: movw r1, :lower16:_bar0 +; ARM: movt r1, :upper16:_bar0 +; ARM: blx r1 +; THUMB: movs r0, #0 +; THUMB: movw r1, :lower16:_bar0 +; THUMB: movt r1, :upper16:_bar0 +; THUMB: blx r1 + %fptr = alloca i32 (i32)*, align 8 + store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8 + %1 = load i32 (i32)** %fptr, align 8 + %call = call i32 %1(i32 0) + ret void +} + +define i32 @LibCall(i32 %a, i32 %b) { +entry: +; ARM: LibCall +; ARM: bl ___udivsi3 +; ARM-LONG: LibCall +; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr +; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; ARM-LONG: ldr r2, [r2] +; ARM-LONG: blx r2 +; THUMB: LibCall +; THUMB: bl ___udivsi3 +; THUMB-LONG: LibCall +; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr +; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; THUMB-LONG: ldr r2, [r2] +; THUMB-LONG: blx r2 + %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1] + ret i32 %tmp1 +} + +define i32 @VarArg() nounwind { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %m = alloca i32, align 4 + %n = alloca i32, align 4 + %tmp = alloca i32, align 4 + %0 = load i32* %i, align 4 + %1 = load i32* %j, align 4 + %2 = load i32* %k, align 4 + %3 = load i32* %m, align 4 + %4 = load i32* %n, align 4 +; ARM: VarArg +; ARM: mov r7, sp +; ARM: movw r0, #5 +; ARM: ldr r1, [r7, #-4] +; ARM: ldr r2, [r7, #-8] +; ARM: ldr r3, [r7, #-12] +; ARM: ldr r9, [sp, #16] +; ARM: ldr r12, [sp, #12] +; ARM: str r9, [sp] +; ARM: str r12, [sp, #4] +; ARM: bl _CallVariadic +; THUMB: mov r7, sp +; THUMB: movs r0, #5 +; THUMB: movt r0, #0 +; THUMB: ldr r1, [sp, #28] +; THUMB: ldr r2, [sp, #24] +; THUMB: ldr r3, [sp, #20] +; THUMB: ldr.w r9, [sp, #16] +; THUMB: ldr.w r12, [sp, #12] +; THUMB: str.w r9, [sp] +; THUMB: str.w r12, [sp, #4] +; THUMB: bl _CallVariadic + %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) + store i32 %call, i32* %tmp, align 4 + %5 = load i32* %tmp, align 4 + ret i32 %5 +} + +declare i32 @CallVariadic(i32, ...) diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll new file mode 100644 index 0000000..8f7b294 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2 +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2 + +define i8* @frameaddr_index0() nounwind { +entry: +; DARWIN-ARM: frameaddr_index0: +; DARWIN-ARM: push {r7} +; DARWIN-ARM: mov r7, sp +; DARWIN-ARM: mov r0, r7 + +; DARWIN-THUMB2: frameaddr_index0: +; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: mov r7, sp +; DARWIN-THUMB2: mov r0, r7 + +; LINUX-ARM: frameaddr_index0: +; LINUX-ARM: push {r11} +; LINUX-ARM: mov r11, sp +; LINUX-ARM: mov r0, r11 + +; LINUX-THUMB2: frameaddr_index0: +; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: mov r7, sp +; LINUX-THUMB2: mov r0, r7 + + %0 = call i8* @llvm.frameaddress(i32 0) + ret i8* %0 +} + +define i8* @frameaddr_index1() nounwind { +entry: +; DARWIN-ARM: frameaddr_index1: +; DARWIN-ARM: push {r7} +; DARWIN-ARM: mov r7, sp +; DARWIN-ARM: mov r0, r7 +; DARWIN-ARM: ldr r0, [r0] + +; DARWIN-THUMB2: frameaddr_index1: +; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: mov r7, sp +; DARWIN-THUMB2: mov r0, r7 +; DARWIN-THUMB2: ldr r0, [r0] + +; LINUX-ARM: frameaddr_index1: +; LINUX-ARM: push {r11} +; LINUX-ARM: mov r11, sp +; LINUX-ARM: mov r0, r11 +; LINUX-ARM: ldr r0, [r0] + +; LINUX-THUMB2: frameaddr_index1: +; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: mov r7, sp +; LINUX-THUMB2: mov r0, r7 +; LINUX-THUMB2: ldr r0, [r0] + + %0 = call i8* @llvm.frameaddress(i32 1) + ret i8* %0 +} + +define i8* @frameaddr_index3() nounwind { +entry: +; DARWIN-ARM: frameaddr_index3: +; DARWIN-ARM: push {r7} +; DARWIN-ARM: mov r7, sp +; DARWIN-ARM: mov r0, r7 +; DARWIN-ARM: ldr r0, [r0] +; DARWIN-ARM: ldr r0, [r0] +; DARWIN-ARM: ldr r0, [r0] + +; DARWIN-THUMB2: frameaddr_index3: +; DARWIN-THUMB2: str r7, [sp, #-4]! +; DARWIN-THUMB2: mov r7, sp +; DARWIN-THUMB2: mov r0, r7 +; DARWIN-THUMB2: ldr r0, [r0] +; DARWIN-THUMB2: ldr r0, [r0] +; DARWIN-THUMB2: ldr r0, [r0] + +; LINUX-ARM: frameaddr_index3: +; LINUX-ARM: push {r11} +; LINUX-ARM: mov r11, sp +; LINUX-ARM: mov r0, r11 +; LINUX-ARM: ldr r0, [r0] +; LINUX-ARM: ldr r0, [r0] +; LINUX-ARM: ldr r0, [r0] + +; LINUX-THUMB2: frameaddr_index3: +; LINUX-THUMB2: str r7, [sp, #-4]! +; LINUX-THUMB2: mov r7, sp +; LINUX-THUMB2: mov r0, r7 +; LINUX-THUMB2: ldr r0, [r0] +; LINUX-THUMB2: ldr r0, [r0] +; LINUX-THUMB2: ldr r0, [r0] + + %0 = call i8* @llvm.frameaddress(i32 3) + ret i8* %0 +} + +declare i8* @llvm.frameaddress(i32) nounwind readnone diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index e6bdfa7..b73fcef 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1 @temp = common global [60 x i8] zeroinitializer, align 1 @@ -13,6 +15,11 @@ define void @t1() nounwind ssp { ; ARM: movw r2, #10 ; ARM: uxtb r1, r1 ; ARM: bl _memset +; ARM-LONG: t1 +; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t1 ; THUMB: movw r0, :lower16:_message1 ; THUMB: movt r0, :upper16:_message1 @@ -23,6 +30,11 @@ define void @t1() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: uxtb r1, r1 ; THUMB: bl _memset +; THUMB-LONG: t1 +; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false) ret void } @@ -41,6 +53,11 @@ define void @t2() nounwind ssp { ; ARM: mov r0, r1 ; ARM: ldr r1, [sp] @ 4-byte Reload ; ARM: bl _memcpy +; ARM-LONG: t2 +; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t2 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr @@ -51,6 +68,11 @@ define void @t2() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: mov r0, r1 ; THUMB: bl _memcpy +; THUMB-LONG: t2 +; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false) ret void } @@ -67,6 +89,11 @@ define void @t3() nounwind ssp { ; ARM: movw r2, #10 ; ARM: mov r0, r1 ; ARM: bl _memmove +; ARM-LONG: t3 +; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t3 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr @@ -77,6 +104,11 @@ define void @t3() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: mov r0, r1 ; THUMB: bl _memmove +; THUMB-LONG: t3 +; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) ret void } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 417e2d9..ecd5fe2 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -226,3 +226,15 @@ define i32 @urem_fold(i32 %a) nounwind { %rem = urem i32 %a, 32 ret i32 %rem } + +define i32 @test7() noreturn nounwind { +entry: +; ARM: @test7 +; THUMB: @test7 +; ARM: trap +; THUMB: trap + tail call void @llvm.trap( ) + unreachable +} + +declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index bc118b8..3c3182b 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -21,3 +21,12 @@ entry: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: ; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} + +; VFP2: test2 +define float @test2(float %a) nounwind { +; CHECK-NOT: mul +; CHECK: mov pc, lr + %ret = fmul float %a, 1.0 + ret float %ret +} + diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll index 802d1b8..303d165 100644 --- a/test/CodeGen/ARM/fusedMAC.ll +++ b/test/CodeGen/ARM/fusedMAC.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s +; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s ; Check generated fused MAC and MLS. define double @fusedMACTest1(double %d1, double %d2, double %d3) { @@ -138,8 +138,16 @@ entry: ; CHECK: vfms.f64 %tmp1 = fsub double -0.0, %b %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone - %tmp3 = fsub double -0.0, %tmp2 - ret double %tmp3 + ret double %tmp2 +} + +define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp { +; CHECK: test_fnms_f32 +; CHECK: vfnms.f32 + %tmp1 = load float* %c, align 4 + %tmp2 = fsub float -0.0, %tmp1 + %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone + ret float %tmp3 } define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { @@ -158,7 +166,8 @@ entry: ; CHECK: vfnms.f64 %tmp1 = fsub double -0.0, %b %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone - ret double %tmp2 + %tmp3 = fsub double -0.0, %tmp2 + ret double %tmp3 } define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { @@ -180,6 +189,36 @@ entry: ret double %tmp3 } +define float @test_fma_const_fold(float %a, float %b) nounwind { +; CHECK: test_fma_const_fold +; CHECK-NOT: vfma +; CHECK-NOT: vmul +; CHECK: vadd + %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b) + ret float %ret +} + +define float @test_fma_canonicalize(float %a, float %b) nounwind { +; CHECK: test_fma_canonicalize +; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00 +; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]] + %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b) + ret float %ret +} + +; Check that very wide vector fma's can be split into legal fma's. +define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp { +; CHECK: test_fma_v8f32 +; CHECK: vfma.f32 +; CHECK: vfma.f32 +entry: + %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone + store <8 x float> %call, <8 x float>* %p, align 16 + ret void +} + + declare float @llvm.fma.f32(float, float, float) nounwind readnone declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll index 89e309d..600a8c2 100644 --- a/test/CodeGen/ARM/iabs.ll +++ b/test/CodeGen/ARM/iabs.ll @@ -10,7 +10,25 @@ define i32 @test(i32 %a) { %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg ret i32 %abs -; CHECK: movs r0, r0 +; CHECK: cmp ; CHECK: rsbmi r0, r0, #0 ; CHECK: bx lr } + +; rdar://11633193 +;; 3 instructions will be generated for abs(a-b): +;; subs +;; rsbmi +;; bx +define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK: test2 +; CHECK: subs +; CHECK-NEXT: rsbmi +; CHECK-NEXT: bx + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub1 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 3f8fd75..73b546d 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3 +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 ; rdar://6949835 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY @@ -18,7 +18,6 @@ entry: ; M3: t: ; M3-NOT: ldrd -; M3: ldm.w r2, {r2, r3} %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll index 8130019..0c8d387 100644 --- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]} +; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]" ; Should use scaled addressing mode. define void @sintzero(i32* %a) nounwind { diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll index 991d728..bbedea1 100644 --- a/test/CodeGen/ARM/movt-movw-global.ll +++ b/test/CodeGen/ARM/movt-movw-global.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI -; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS -; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC -; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC +; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI +; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS +; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC +; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC @foo = common global i32 0 diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll index de48fee..4a82c36 100644 --- a/test/CodeGen/ARM/neon_div.ll +++ b/test/CodeGen/ARM/neon_div.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 -;CHECK: vrecpe.f32 ;CHECK: vmovn.i32 +;CHECK: vrecpe.f32 ;CHECK: vmovn.i32 ;CHECK: vmovn.i16 %tmp1 = load <8 x i8>* %A @@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 ;CHECK: vrecps.f32 +;CHECK: vmovn.i32 ;CHECK: vrecpe.f32 ;CHECK: vrecps.f32 ;CHECK: vmovn.i32 -;CHECK: vmovn.i32 ;CHECK: vqmovun.s16 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll index b4da552..df98e23 100644 --- a/test/CodeGen/ARM/opt-shuff-tstore.ll +++ b/test/CodeGen/ARM/opt-shuff-tstore.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s +; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -mattr=+neon < %s | FileCheck %s ; CHECK: func_4_8 ; CHECK: vst1.32 diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll new file mode 100644 index 0000000..4bc8810 --- /dev/null +++ b/test/CodeGen/ARM/pr13249.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple armv7--linux-gnueabi + +define arm_aapcscc i8* @__strtok_r_1c(i8* %arg, i8 signext %arg1, i8** nocapture %arg2) nounwind { +bb: + br label %bb3 + +bb3: ; preds = %bb3, %bb + %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ] + %tmp4 = load i8* %tmp, align 1 + %tmp5 = getelementptr inbounds i8* %tmp, i32 1 + br i1 undef, label %bb3, label %bb7 + +bb7: ; preds = %bb13, %bb3 + %tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ] + %tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ] + %tmp10 = icmp ne i8 %tmp8, %arg1 + %tmp12 = getelementptr inbounds i8* %tmp9, i32 1 + br i1 %tmp10, label %bb13, label %bb15 + +bb13: ; preds = %bb7 + %tmp14 = load i8* %tmp12, align 1 + br label %bb7 + +bb15: ; preds = %bb7 + store i8* %tmp9, i8** %arg2, align 4 + ret i8* %tmp +} diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 3e07da8..418d4f3 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -113,3 +113,29 @@ entry: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize ret void } + +; CHECK: f10 +define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp { +; CHECK-NOT: floatsisf + %1 = icmp eq i32 %a, %b + %2 = zext i1 %1 to i32 + %3 = sitofp i32 %2 to float + ret float %3 +} + +; CHECK: f11 +define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp { +; CHECK-NOT: floatsisf + %1 = icmp eq i32 %a, %b + %2 = sitofp i1 %1 to float + ret float %2 +} + +; CHECK: f12 +define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp { +; CHECK-NOT: floatunsisf + %1 = icmp eq i32 %a, %b + %2 = uitofp i1 %1 to float + ret float %2 +} + diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll new file mode 100644 index 0000000..99df0d4 --- /dev/null +++ b/test/CodeGen/ARM/smml.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp { +entry: +; CHECK-NOT: smmls + %conv4 = zext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %conv2 = sext i32 %c to i64 + %mul = mul nsw i64 %conv2, %conv1 + %shr5 = lshr i64 %mul, 32 + %sub = sub nsw i64 %conv4, %shr5 + %conv3 = trunc i64 %sub to i32 + ret i32 %conv3 +} diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index 983ba45..5ce2bce 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,13 +1,12 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s - -; The greedy register allocator uses a single CSR here, invalidating the test. +; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s @b = external global i64* define i64 @t(i64 %a) nounwind readonly { entry: -; CHECK: push {lr} -; CHECK: pop {lr} +; CHECK: push {r4, r5, lr} +; CHECK: pop {r4, r5, pc} + call void asm sideeffect "", "~{r4},~{r5}"() nounwind %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 %2 = mul i64 %1, %a diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll index e56e3f2..d8b3f0e 100644 --- a/test/CodeGen/ARM/str_pre.ll +++ b/test/CodeGen/ARM/str_pre.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm | \ -; RUN: grep {str.*\\!} | count 2 +; RUN: grep "str.*\!" | count 2 define void @test1(i32* %X, i32* %A, i32** %dest) { %B = load i32* %A ; <i32> [#uses=1] diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll new file mode 100644 index 0000000..99ba475 --- /dev/null +++ b/test/CodeGen/ARM/struct_byval.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s + +; rdar://9877866 +%struct.SmallStruct = type { i32, [8 x i32], [37 x i8] } +%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] } + +define i32 @f() nounwind ssp { +entry: +; CHECK: f: +; CHECK: ldr +; CHECK: str +; CHECK-NOT:bne + %st = alloca %struct.SmallStruct, align 4 + %call = call i32 @e1(%struct.SmallStruct* byval %st) + ret i32 0 +} + +; Generate a loop for large struct byval +define i32 @g() nounwind ssp { +entry: +; CHECK: g: +; CHECK: ldr +; CHECK: sub +; CHECK: str +; CHECK: bne + %st = alloca %struct.LargeStruct, align 4 + %call = call i32 @e2(%struct.LargeStruct* byval %st) + ret i32 0 +} + +; Generate a loop using NEON instructions +define i32 @h() nounwind ssp { +entry: +; CHECK: h: +; CHECK: vld1 +; CHECK: sub +; CHECK: vst1 +; CHECK: bne + %st = alloca %struct.LargeStruct, align 16 + %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st) + ret i32 0 +} + +declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind +declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind +declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll new file mode 100644 index 0000000..6fcbdee --- /dev/null +++ b/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s + +define i32 @f(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: f: +; CHECK: subs +; CHECK-NOT: cmp + %cmp = icmp sgt i32 %a, %b + %sub = sub nsw i32 %a, %b + %sub. = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sub. +} + +define i32 @g(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: g: +; CHECK: subs +; CHECK-NOT: cmp + %cmp = icmp slt i32 %a, %b + %sub = sub nsw i32 %b, %a + %sub. = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sub. +} + +define i32 @h(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: h: +; CHECK: subs +; CHECK-NOT: cmp + %cmp = icmp sgt i32 %a, 3 + %sub = sub nsw i32 %a, 3 + %sub. = select i1 %cmp, i32 %sub, i32 %b + ret i32 %sub. +} + +; rdar://11725965 +define i32 @i(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK: i: +; CHECK: subs +; CHECK-NOT: cmp + %cmp = icmp ult i32 %a, %b + %sub = sub i32 %b, %a + %sub. = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sub. +} +; If CPSR is live-out, we can't remove cmp if there exists +; a swapped sub. +define i32 @j(i32 %a, i32 %b) nounwind { +entry: +; CHECK: j: +; CHECK: sub +; CHECK: cmp + %cmp = icmp eq i32 %b, %a + %sub = sub nsw i32 %a, %b + br i1 %cmp, label %if.then, label %if.else + +if.then: + %cmp2 = icmp sgt i32 %b, %a + %sel = select i1 %cmp2, i32 %sub, i32 %a + ret i32 %sel + +if.else: + ret i32 %sub +} diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll index 06ea703..474043a 100644 --- a/test/CodeGen/ARM/sub.ll +++ b/test/CodeGen/ARM/sub.ll @@ -36,3 +36,15 @@ entry: %sel = select i1 %cmp, i32 1, i32 %sub ret i32 %sel } + +; rdar://11726136 +define i32 @f5(i32 %x) { +entry: +; CHECK: f5 +; CHECK: movw r1, #65535 +; CHECK-NOT: movt +; CHECK-NOT: add +; CHECK: sub r0, r0, r1 + %sub = add i32 %x, -65535 + ret i32 %sub +} diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll index 3143387..c403fa5 100644 --- a/test/CodeGen/ARM/thread_pointer.ll +++ b/test/CodeGen/ARM/thread_pointer.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {__aeabi_read_tp} +; RUN: grep "__aeabi_read_tp" define i8* @test() { entry: diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll index 28fd469..a25352c 100644 --- a/test/CodeGen/ARM/thumb2-it-block.ll +++ b/test/CodeGen/ARM/thumb2-it-block.ll @@ -3,10 +3,10 @@ define i32 @test(i32 %a, i32 %b) { entry: -; CHECK: movs.w +; CHECK: cmp ; CHECK-NEXT: it mi ; CHECK-NEXT: rsbmi -; CHECK-NEXT: movs.w +; CHECK-NEXT: cmp ; CHECK-NEXT: it mi ; CHECK-NEXT: rsbmi %cmp1 = icmp slt i32 %a, 0 diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll new file mode 100644 index 0000000..a5f3c90 --- /dev/null +++ b/test/CodeGen/ARM/tls-models.ll @@ -0,0 +1,117 @@ +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s + + +@external_gd = external thread_local global i32 +@internal_gd = internal thread_local global i32 42 + +@external_ld = external thread_local(localdynamic) global i32 +@internal_ld = internal thread_local(localdynamic) global i32 42 + +@external_ie = external thread_local(initialexec) global i32 +@internal_ie = internal thread_local(initialexec) global i32 42 + +@external_le = external thread_local(localexec) global i32 +@internal_le = internal thread_local(localexec) global i32 42 + +; ----- no model specified ----- + +define i32* @f1() { +entry: + ret i32* @external_gd + + ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. + ; CHECK-NONPIC: f1: + ; CHECK-NONPIC: external_gd(gottpoff) + ; CHECK-PIC: f1: + ; CHECK-PIC: external_gd(tlsgd) +} + +define i32* @f2() { +entry: + ret i32* @internal_gd + + ; Non-PIC code can use local exec, PIC code can use local dynamic, + ; but that is not implemented, so falls back to general dynamic. + ; CHECK-NONPIC: f2: + ; CHECK-NONPIC: internal_gd(tpoff) + ; CHECK-PIC: f2: + ; CHECK-PIC: internal_gd(tlsgd) +} + + +; ----- localdynamic specified ----- + +define i32* @f3() { +entry: + ret i32* @external_ld + + ; Non-PIC code can use initial exec, PIC should use local dynamic, + ; but that is not implemented, so falls back to general dynamic. + ; CHECK-NONPIC: f3: + ; CHECK-NONPIC: external_ld(gottpoff) + ; CHECK-PIC: f3: + ; CHECK-PIC: external_ld(tlsgd) +} + +define i32* @f4() { +entry: + ret i32* @internal_ld + + ; Non-PIC code can use local exec, PIC code can use local dynamic, + ; but that is not implemented, so it falls back to general dynamic. + ; CHECK-NONPIC: f4: + ; CHECK-NONPIC: internal_ld(tpoff) + ; CHECK-PIC: f4: + ; CHECK-PIC: internal_ld(tlsgd) +} + + +; ----- initialexec specified ----- + +define i32* @f5() { +entry: + ret i32* @external_ie + + ; Non-PIC and PIC code will use initial exec as specified. + ; CHECK-NONPIC: f5: + ; CHECK-NONPIC: external_ie(gottpoff) + ; CHECK-PIC: f5: + ; CHECK-PIC: external_ie(gottpoff) +} + +define i32* @f6() { +entry: + ret i32* @internal_ie + + ; Non-PIC code can use local exec, PIC code use initial exec as specified. + ; CHECK-NONPIC: f6: + ; CHECK-NONPIC: internal_ie(tpoff) + ; CHECK-PIC: f6: + ; CHECK-PIC: internal_ie(gottpoff) +} + + +; ----- localexec specified ----- + +define i32* @f7() { +entry: + ret i32* @external_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f7: + ; CHECK-NONPIC: external_le(tpoff) + ; CHECK-PIC: f7: + ; CHECK-PIC: external_le(tpoff) +} + +define i32* @f8() { +entry: + ret i32* @internal_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f8: + ; CHECK-NONPIC: internal_le(tpoff) + ; CHECK-PIC: f8: + ; CHECK-PIC: internal_le(tpoff) +} diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll index 1087094..ec4278c 100644 --- a/test/CodeGen/ARM/tls1.ll +++ b/test/CodeGen/ARM/tls1.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {i(tpoff)} +; RUN: grep "i(tpoff)" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {__aeabi_read_tp} +; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ -; RUN: -relocation-model=pic | grep {__tls_get_addr} +; RUN: -relocation-model=pic | grep "__tls_get_addr" @i = thread_local global i32 15 ; <i32*> [#uses=2] diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll index df7a4ca..e0e944f 100644 --- a/test/CodeGen/ARM/tls3.ll +++ b/test/CodeGen/ARM/tls3.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {tbss} +; RUN: grep "tbss" %struct.anon = type { i32, i32 } @teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll new file mode 100644 index 0000000..4e227dd --- /dev/null +++ b/test/CodeGen/ARM/twoaddrinstr.ll @@ -0,0 +1,21 @@ +; Tests for the two-address instruction pass. +; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s + +define void @PR13378() nounwind { +; This was orriginally a crasher trying to schedule the instructions. +; CHECK: PR13378: +; CHECK: vldmia +; CHECK-NEXT: vmov.f32 +; CHECK-NEXT: vstmia +; CHECK-NEXT: vstmia +; CHECK-NEXT: vmov.f32 +; CHECK-NEXT: vstmia + +entry: + %0 = load <4 x float>* undef + store <4 x float> zeroinitializer, <4 x float>* undef + store <4 x float> %0, <4 x float>* undef + %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3 + store <4 x float> %1, <4 x float>* undef + unreachable +} diff --git a/test/CodeGen/ARM/unsafe-fsub.ll b/test/CodeGen/ARM/unsafe-fsub.ll new file mode 100644 index 0000000..3a4477d --- /dev/null +++ b/test/CodeGen/ARM/unsafe-fsub.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s +; RUN: llc -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=FAST %s + +target triple = "armv7-apple-ios" + +; SAFE: test +; FAST: test +define float @test(float %x, float %y) { +entry: +; SAFE: vmul.f32 +; SAFE: vsub.f32 +; FAST: mov r0, #0 + %0 = fmul float %x, %y + %1 = fsub float %0, %0 + ret float %1 +} + + diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll index 450f90d..9f55c24 100644 --- a/test/CodeGen/ARM/vcnt.ll +++ b/test/CodeGen/ARM/vcnt.ll @@ -1,79 +1,80 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; NB: this tests vcnt, vclz, and vcls define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { ;CHECK: vcnt8: -;CHECK: vcnt.8 +;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1) + %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { ;CHECK: vcntQ8: -;CHECK: vcnt.8 +;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} %tmp1 = load <16 x i8>* %A - %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1) + %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone +declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { ;CHECK: vclz8: -;CHECK: vclz.i8 +;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1) + %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0) ret <8 x i8> %tmp2 } define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { ;CHECK: vclz16: -;CHECK: vclz.i16 +;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} %tmp1 = load <4 x i16>* %A - %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1) + %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0) ret <4 x i16> %tmp2 } define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { ;CHECK: vclz32: -;CHECK: vclz.i32 +;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} %tmp1 = load <2 x i32>* %A - %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1) + %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0) ret <2 x i32> %tmp2 } define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { ;CHECK: vclzQ8: -;CHECK: vclz.i8 +;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} %tmp1 = load <16 x i8>* %A - %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1) + %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0) ret <16 x i8> %tmp2 } define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { ;CHECK: vclzQ16: -;CHECK: vclz.i16 +;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} %tmp1 = load <8 x i16>* %A - %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1) + %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0) ret <8 x i16> %tmp2 } define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { ;CHECK: vclzQ32: -;CHECK: vclz.i32 +;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} %tmp1 = load <4 x i32>* %A - %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1) + %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0) ret <4 x i32> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { ;CHECK: vclss8: diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll index 1ec36da..8fd3db2 100644 --- a/test/CodeGen/ARM/vector-extend-narrow.ll +++ b/test/CodeGen/ARM/vector-extend-narrow.ll @@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) { ; CHECK: g: define float @g(<4 x i8>* nocapture %in) { - ; CHECK: vldr +; Note: vld1 here is reasonably important. Mixing VFP and NEON +; instructions is bad on some cores + ; CHECK: vld1 ; CHECK: vmovl.u8 ; CHECK: vmovl.u16 %1 = load <4 x i8>* %in @@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) { ; CHECK: i: define <4 x i8> @i(<4 x i8>* %x) { - ; CHECK: vldr +; Note: vld1 here is reasonably important. Mixing VFP and NEON +; instructions is bad on some cores + ; CHECK: vld1 ; CHECK: vmovl.s8 ; CHECK: vmovl.s16 ; CHECK: vrecpe diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index 61d73c1..c69473f 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -75,12 +75,12 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind { ret <8 x i8> %tmp5 } -define <4 x i16> @vld2dupi16(i16* %A) nounwind { +define <4 x i16> @vld2dupi16(i8* %A) nounwind { ;CHECK: vld2dupi16: ;Check that a power-of-two alignment smaller than the total size of the memory ;being loaded is ignored. ;CHECK: vld2.16 {d16[], d17[]}, [r0] - %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 @@ -94,7 +94,8 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind { ;CHECK: vld2dupi16_update: ;CHECK: vld2.16 {d16[], d17[]}, [r1]! %A = load i16** %ptr - %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 @@ -105,11 +106,11 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind { ret <4 x i16> %tmp5 } -define <2 x i32> @vld2dupi32(i32* %A) nounwind { +define <2 x i32> @vld2dupi32(i8* %A) nounwind { ;CHECK: vld2dupi32: ;Check the alignment value. Max for this instruction is 64 bits: ;CHECK: vld2.32 {d16[], d17[]}, [r0, :64] - %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16) + %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16) %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1 @@ -119,8 +120,8 @@ define <2 x i32> @vld2dupi32(i32* %A) nounwind { } declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly -declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly -declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -144,11 +145,11 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind { ret <8 x i8> %tmp8 } -define <4 x i16> @vld3dupi16(i16* %A) nounwind { +define <4 x i16> @vld3dupi16(i8* %A) nounwind { ;CHECK: vld3dupi16: ;Check the (default) alignment value. VLD3 does not support alignment. ;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0] - %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8) + %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8) %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1 @@ -161,7 +162,7 @@ define <4 x i16> @vld3dupi16(i16* %A) nounwind { } declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @@ -171,7 +172,8 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind { ;CHECK: vld4dupi16_update: ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]! %A = load i16** %ptr - %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1 @@ -188,12 +190,12 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind { ret <4 x i16> %tmp11 } -define <2 x i32> @vld4dupi32(i32* %A) nounwind { +define <2 x i32> @vld4dupi32(i8* %A) nounwind { ;CHECK: vld4dupi32: ;Check the alignment value. An 8-byte alignment is allowed here even though ;it is smaller than the total size of the memory being loaded. ;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64] - %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8) + %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8) %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1 @@ -208,5 +210,5 @@ define <2 x i32> @vld4dupi32(i32* %A) nounwind { ret <2 x i32> %tmp11 } -declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly -declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 61d89bb..74628f0 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) { %3 = extractelement <8 x i16> %2, i32 0 ret i16 %3 } + +; A constant build_vector created for a vmull with half-width elements must +; not introduce illegal types. <rdar://problem/11324364> +define void @vmull_buildvector() nounwind optsize ssp align 2 { +; CHECK: vmull_buildvector +entry: + br i1 undef, label %for.end179, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.cond.loopexit: ; preds = %for.body33, %for.body + br i1 undef, label %for.end179, label %for.body + +for.body: ; preds = %for.cond.loopexit, %for.body.lr.ph + br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph + +for.body33.lr.ph: ; preds = %for.body + %.sub = select i1 undef, i32 0, i32 undef + br label %for.body33 + +for.body33: ; preds = %for.body33, %for.body33.lr.ph + %add45 = add i32 undef, undef + %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1) + %0 = load i32** undef, align 4 + %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8> + %vmovl.i249 = zext <8 x i8> %1 to <8 x i16> + %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1> + %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8> + %3 = bitcast <16 x i8> undef to <2 x i64> + %vmovl.i237 = zext <8 x i8> undef to <8 x i16> + %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer + %vmovl.i225 = zext <8 x i8> undef to <8 x i16> + %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249 + %vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> + %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind + %mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80> + %vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130 + %sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134 + %add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96> + %add.i198 = add <8 x i16> %add.i200, %sub.i203 + %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237 + %mul.i191 = mul <8 x i16> %vshr_n130, undef + %add.i192 = add <8 x i16> %mul.i191, %mul.i194 + %mul.i187 = mul <8 x i16> %vshr_n134, undef + %add.i188 = add <8 x i16> %mul.i187, %add.i192 + %mul.i185 = mul <8 x i16> undef, undef + %add.i186 = add <8 x i16> %mul.i185, undef + %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>) + %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>) + %mul.i184 = mul <8 x i16> undef, %vrshr_n160 + %mul.i181 = mul <8 x i16> undef, %vmovl.i225 + %add.i182 = add <8 x i16> %mul.i181, %mul.i184 + %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>) + %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind + %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1> + %5 = bitcast <2 x i64> %shuffle.i to <16 x i8> + store <16 x i8> %5, <16 x i8>* undef, align 16 + %add177 = add nsw i32 undef, 16 + br i1 undef, label %for.body33, label %for.cond.loopexit + +for.end179: ; preds = %for.cond.loopexit, %entry + ret void +} + +declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index e3372a0..f117ab2 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: |
