aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-04-26 21:31:35 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-04-26 21:31:35 +0000
commit554daa67bd1c4f01fb7a00f2f4255a52b81e9fa3 (patch)
tree94e9a50d8a25d4072cacd8cf9b2634461838e079
parent90fab0f9d8e275f26f2e58bd5aaf9a3ac389dfaa (diff)
downloadexternal_llvm-554daa67bd1c4f01fb7a00f2f4255a52b81e9fa3.zip
external_llvm-554daa67bd1c4f01fb7a00f2f4255a52b81e9fa3.tar.gz
external_llvm-554daa67bd1c4f01fb7a00f2f4255a52b81e9fa3.tar.bz2
Be careful about scheduling nodes above previous calls. It increase usages of
more callee-saved registers and introduce copies. Only allows it if scheduling a node above calls would end up lessen register pressure. Call operands also has added ABI restrictions for register allocation, so be extra careful with hoisting them above calls. rdar://9329627 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130245 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp43
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp19
-rw-r--r--test/CodeGen/ARM/2011-04-26-SchedTweak.ll70
-rw-r--r--test/CodeGen/ARM/shifter_operand.ll2
-rw-r--r--test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll20
-rw-r--r--test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll2
-rw-r--r--test/CodeGen/X86/pic.ll2
8 files changed, 138 insertions, 27 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 4e0971f..2eb3db3 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -252,6 +252,7 @@ namespace llvm {
unsigned short Latency; // Node latency.
bool isVRegCycle : 1; // May use and def the same vreg.
bool isCall : 1; // Is a function call.
+ bool isCallOp : 1; // Is a function call operand.
bool isTwoAddress : 1; // Is a two-address instruction.
bool isCommutable : 1; // Is a commutable instruction.
bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
@@ -280,7 +281,7 @@ namespace llvm {
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isTwoAddress(false),
+ isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
@@ -294,7 +295,7 @@ namespace llvm {
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isTwoAddress(false),
+ isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
@@ -307,7 +308,7 @@ namespace llvm {
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isTwoAddress(false),
+ isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 5f28cdb..88bd450 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1732,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// If SU does not have a register def, schedule it close to its uses
// because it does not lengthen any live ranges.
return 0;
+#if 1
return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
}
//===----------------------------------------------------------------------===//
@@ -2238,11 +2248,35 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
if (LPriority != RPriority) {
DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
}
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
// t1 = op t2, c1
@@ -2275,7 +2309,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return LScratch > RScratch;
}
- if (!DisableSchedCycles) {
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 94b8c2f..9f2f012 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -83,6 +83,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -285,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
+ SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
@@ -337,6 +339,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
if (!HasGlueUse) break;
}
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
// Schedule zero-latency TokenFactor below any nodes that may increase the
// schedule height. Otherwise, ancestors of the TokenFactor may appear to
// have false stalls.
@@ -356,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
}
void ScheduleDAGSDNodes::AddSchedEdges() {
diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
new file mode 100644
index 0000000..ed7dd03
--- /dev/null
+++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+
+; Do not move the umull above previous call which would require use of
+; more callee-saved registers and introduce copies.
+; rdar://9329627
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK: test:
+; CHECK: push
+; CHECK-NOT: push
+ %block_size = alloca i32, align 4
+ %block_count = alloca i32, align 4
+ %index_cache = alloca i32, align 4
+ store i32 0, i32* %index_cache, align 4
+ %tmp = load i32* @G, align 4
+ %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+ switch i32 %tmp1, label %bb8 [
+ i32 0, label %bb
+ i32 536870913, label %bb4
+ i32 536870914, label %bb6
+ ]
+
+bb:
+ %tmp2 = load i32* @G, align 4
+ %tmp4 = icmp eq i32 %tmp2, 0
+ br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: umull
+; CHECK: blx _Get
+; CHECK: umull
+; CHECK: blx _foo
+ %tmp5 = load i32* %block_size, align 4
+ %tmp6 = load i32* %block_count, align 4
+ %tmp7 = call %struct.FF* @Get() nounwind
+ store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+ %tmp10 = zext i32 %tmp6 to i64
+ %tmp11 = zext i32 %tmp5 to i64
+ %tmp12 = mul nsw i64 %tmp10, %tmp11
+ %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+ br label %bb8
+
+bb4:
+ ret i32 0
+
+bb6:
+ ret i32 1
+
+bb8:
+ ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index be891bc..f0e2d10 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -58,7 +58,7 @@ entry:
; A8: str r2, [r0, r1, lsl #2]
; A9: test4:
-; A9: add r0, r0, r4, lsl #2
+; A9: add r0, r0, r{{[0-9]+}}, lsl #2
; A9: ldr r1, [r0]
; A9: str r1, [r0]
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
diff --git a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
deleted file mode 100644
index fad2669..0000000
--- a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
-
-target triple = "thumbv6-apple-darwin10"
-
-@fred = internal global i32 0 ; <i32*> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-; CHECK: str r0, [sp
- %0 = call i32 (...)* @bar() nounwind ; <i32> [#uses=1]
-; CHECK: blx _bar
-; CHECK: ldr r1, [sp
- store i32 %0, i32* @fred, align 4
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-declare i32 @bar(...)
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 789a891..3594424 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -13,7 +13,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<
; CHECK: _ZNKSs7compareERKSs:
; CHECK: it eq
; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: ldmia.w sp!,
entry:
%0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
%1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index dc5fcd7..fb60ac2 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -79,8 +79,8 @@ entry:
; LINUX-NEXT: .L3$pb:
; LINUX: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
-; LINUX: movl pfoo@GOT(%[[REG3]]),
; LINUX: calll afoo@PLT
+; LINUX: movl pfoo@GOT(%[[REG3]]),
; LINUX: calll *
}