aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2010-06-18 19:00:18 +0000
committerDale Johannesen <dalej@apple.com>2010-06-18 19:00:18 +0000
commitc66cdf74a9f1ee12cb9bff39cbd6bc518fbc2d3e (patch)
tree202dd9a0b8b2c93ff8c1f1b5a0c22641bf9517d2
parent71f095b20a2b1710d35b81fced4ae8b2ca1a6f61 (diff)
downloadexternal_llvm-c66cdf74a9f1ee12cb9bff39cbd6bc518fbc2d3e.zip
external_llvm-c66cdf74a9f1ee12cb9bff39cbd6bc518fbc2d3e.tar.gz
external_llvm-c66cdf74a9f1ee12cb9bff39cbd6bc518fbc2d3e.tar.bz2
Enable tail calls on ARM by default, with some
basic tests. This has been well tested on Darwin but not elsewhere. It should work provided the linker correctly resolves B.W <label in other function> which it has not seen before, at least from llvm-based compilers. I'm leaving the arm-tail-calls switch in until I see if there's any problems because of that; it might need to be disabled for some environments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106299 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp2
-rw-r--r--test/CodeGen/ARM/call-tc.ll36
-rw-r--r--test/CodeGen/ARM/ifcvt6-tc.ll23
-rw-r--r--test/CodeGen/ARM/insn-sched1-tc.ll11
-rw-r--r--test/CodeGen/ARM/ldm-tc.ll37
-rw-r--r--test/CodeGen/Thumb2/thumb2-call-tc.ll27
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll86
7 files changed, 221 insertions, 1 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 50258c6..216ba65 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,7 +55,7 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(false));
+ cl::init(true));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
new file mode 100644
index 0000000..8103fab
--- /dev/null
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+
+@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; CHECKELF: PLT
+ call void @g( i32 1, i32 2, i32 3, i32 4 )
+ ret void
+}
+
+define void @g.upgrd.1() {
+; CHECKV4: bx r0 @ TAILCALL
+; CHECKV5: bx r0 @ TAILCALL
+ %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
+ ret void
+}
+
+define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: m_231b
+; CHECKV4: bx r{{.*}}
+BB0:
+ %5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
+ %t35 = volatile load i32* %5 ; <i32> [#uses=1]
+ %6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1]
+ %7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1]
+ %8 = load i32** %7 ; <i32*> [#uses=1]
+ %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
+ %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
+ ret i32* %10
+}
diff --git a/test/CodeGen/ARM/ifcvt6-tc.ll b/test/CodeGen/ARM/ifcvt6-tc.ll
new file mode 100644
index 0000000..5b28804
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt6-tc.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep cmpne | count 1
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep bhi | count 1
+; Here, tail call wins over eliminating branches. It is 1 fewer instruction
+; and removes all stack accesses, so seems like a win.
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+ %tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
+ %tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
+ %tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
+ br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ %tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/test/CodeGen/ARM/insn-sched1-tc.ll b/test/CodeGen/ARM/insn-sched1-tc.ll
new file mode 100644
index 0000000..c457c8c
--- /dev/null
+++ b/test/CodeGen/ARM/insn-sched1-tc.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: grep mov | count 2
+
+define i32 @test(i32 %x) {
+ %tmp = trunc i32 %x to i16 ; <i16> [#uses=1]
+ %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+declare i32 @f(i32, i16)
diff --git a/test/CodeGen/ARM/ldm-tc.ll b/test/CodeGen/ARM/ldm-tc.ll
new file mode 100644
index 0000000..3819192
--- /dev/null
+++ b/test/CodeGen/ARM/ldm-tc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: ldmib
+; CHECK: b.w _f2 @ TAILCALL
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll
new file mode 100644
index 0000000..d31ae0c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+ call void @g( i32 1, i32 2, i32 3, i32 4 )
+ ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: bx r0 @ TAILCALL
+
+; LINUX: h:
+; LINUX: bx r0 @ TAILCALL
+ %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
new file mode 100644
index 0000000..c024415
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+ switch i32 %c, label %cond_next [
+ i32 1, label %cond_true
+ i32 7, label %cond_true
+ ]
+
+cond_true:
+ %tmp12 = add i32 %a, 1
+ %tmp1518 = add i32 %tmp12, %b
+ ret i32 %tmp1518
+
+cond_next:
+ %tmp15 = add i32 %b, %a
+ ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
+ %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
+ br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer: ; preds = %cond_false, %entry
+ %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
+ %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ br label %bb
+
+bb: ; preds = %cond_true, %bb.outer
+ %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
+ %tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
+ %tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
+ %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
+ %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
+ br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true: ; preds = %bb
+ %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
+ %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp1437, label %bb17, label %bb
+
+cond_false: ; preds = %bb
+ %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
+ %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
+ br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17: ; preds = %cond_false, %cond_true, %entry
+ %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ ret i32 %a_addr.026.1
+}
+
+@x = external global i32* ; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+ %tmp = load i32** @x ; <i32*> [#uses=1]
+ store i32 %a, i32* %tmp
+ ret void
+}
+
+; Tail call prevents use of ifcvt in this one. Seems like a win though.
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK-NOT: it lt
+; CHECK-NOT: poplt
+; CHECK: b.w _foo @ TAILCALL
+ %tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
+ br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ tail call void @foo( i32 %b )
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}