aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2012-02-24 01:19:29 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2012-02-24 01:19:29 +0000
commitc54f6348861517398f17e85f41b30c4dd079fc3d (patch)
tree89724d722ff842bf01ca2fe55fc82f4d703f9c98
parentd1b220a33b16d97ef4194b4da625ba1c57974dee (diff)
downloadexternal_llvm-c54f6348861517398f17e85f41b30c4dd079fc3d.zip
external_llvm-c54f6348861517398f17e85f41b30c4dd079fc3d.tar.gz
external_llvm-c54f6348861517398f17e85f41b30c4dd079fc3d.tar.bz2
Switch ARM target to register masks.
I'll let the buildbots determine the compile time improvements from this change, but 464.h264ref has 5% faster codegen at -O2. This patch does cause some assembly changes. Branch folding can make different decisions about calls with dead return values. CriticalAntiDepBreaker may choose different registers because its liveness tracking is affected. MachineCopyPropagation may sometimes leave a dead copy behind. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151331 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td24
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td18
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td4
5 files changed, 25 insertions, 35 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 458bb1d..c7622f8 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2151,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -2283,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I,
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 94ddf86..aa51ced 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1600,6 +1600,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 798cf52..dae09c7 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1898,18 +1898,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
}
}
-// All calls clobber the non-callee saved registers. SP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
let isCall = 1,
- // On non-IOS platforms R9 is callee-saved.
// FIXME: Do we really need a non-predicated version? If so, it should
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
- Defs = [R0, R1, R2, R3, R12, LR,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
- CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
@@ -1964,10 +1959,7 @@ let isCall = 1,
// On IOS R9 is call-clobbered.
// R7 is marked as a use to prevent frame-pointer assignments from being
// moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
- CPSR, FPSCR],
- Uses = [R7, SP] in {
+ Defs = [LR], Uses = [R7, SP] in {
def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
4, IIC_Br,
[(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
@@ -2071,9 +2063,7 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
- let Defs = [R0, R1, R2, R3, R9, R12,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
- Uses = [SP] in {
+ let Uses = [SP] in {
def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
IIC_Br, []>, Requires<[IsIOS]>;
@@ -2093,9 +2083,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
// Non-IOS versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
- Uses = [SP] in {
+ let Uses = [SP] in {
def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
IIC_Br, []>, Requires<[IsNotIOS]>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 9019e8c..ba1791b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -406,10 +406,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// potentially appearing dead.
let isCall = 1,
// On non-IOS platforms R9 is callee-saved.
- Defs = [R0, R1, R2, R3, R12, LR,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
- CPSR, FPSCR],
- Uses = [SP] in {
+ Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
(outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
@@ -460,10 +457,7 @@ let isCall = 1,
// On IOS R9 is call-clobbered.
// R7 is marked as a use to prevent frame-pointer assignments from being
// moved above / below calls.
- Defs = [R0, R1, R2, R3, R9, R12, LR,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
- CPSR, FPSCR],
- Uses = [R7, SP] in {
+ Defs = [LR], Uses = [R7, SP] in {
// Also used for Thumb2
def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
@@ -529,9 +523,7 @@ let isBranch = 1, isTerminator = 1 in
// Tail calls
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS versions.
- let Defs = [R0, R1, R2, R3, R9, R12,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
- Uses = [SP] in {
+ let Uses = [SP] in {
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
// on IOS), so it's in ARMInstrThumb2.td.
def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
@@ -540,9 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
Requires<[IsThumb, IsIOS]>;
}
// Non-IOS versions (the difference is R9).
- let Defs = [R0, R1, R2, R3, R12,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
- Uses = [SP] in {
+ let Uses = [SP] in {
def tTAILJMPdND : tPseudoExpand<(outs),
(ins t_brtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 555d876..542996d 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3260,9 +3260,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// it goes here.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
- let Defs = [R0, R1, R2, R3, R9, R12, PC,
- Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- Uses = [SP] in
+ let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
4, IIC_Br, [],