aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2012-02-07 22:50:41 +0000
committerEvan Cheng <evan.cheng@apple.com>2012-02-07 22:50:41 +0000
commitde1df103b9c578d0a1609054a5944342c5d0ba23 (patch)
treeebcae362ebc8246f1792d6c2d7792d0b6cf034d3
parent0ae2510ea00454af29c6fc3f4b012e35d5f5d431 (diff)
downloadexternal_llvm-de1df103b9c578d0a1609054a5944342c5d0ba23.zip
external_llvm-de1df103b9c578d0a1609054a5944342c5d0ba23.tar.gz
external_llvm-de1df103b9c578d0a1609054a5944342c5d0ba23.tar.bz2
Use LEA to adjust stack ptr for Atom. Patch by Andy Zhang.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150008 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp62
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.h5
4 files changed, 52 insertions, 21 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index d5db45b..7f2ece7 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -115,6 +115,8 @@ def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
"Support BMI instructions">;
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
+def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+ "Use LEA for adjusting the stack pointer">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -155,7 +157,7 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem]>;
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureFastUAMem,
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 17884eb..a2e5e35 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -79,6 +79,10 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
+static unsigned getLEArOpcode(unsigned is64Bit) {
+ return is64Bit ? X86::LEA64r : X86::LEA32r;
+}
+
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
/// when it reaches the "return" instruction. We can then pop a stack object
/// to this register without worry about clobbering it.
@@ -141,13 +145,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ bool Is64Bit, bool UseLEA,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub ?
- getSUBriOpcode(Is64Bit, Offset) :
- getADDriOpcode(Is64Bit, Offset);
+ unsigned Opc;
+ if (UseLEA)
+ Opc = getLEArOpcode(Is64Bit);
+ else
+ Opc = isSub
+ ? getSUBriOpcode(Is64Bit, Offset)
+ : getADDriOpcode(Is64Bit, Offset);
+
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -171,13 +180,21 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
+ MachineInstr *MI = NULL;
+
+ if (UseLEA) {
+ MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ StackPtr, false, isSub ? -ThisVal : ThisVal);
+ } else {
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
if (isSub)
MI->setFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
Offset -= ThisVal;
}
}
@@ -191,7 +208,8 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr) {
if (NumBytes)
*NumBytes += PI->getOperand(2).getImm();
@@ -237,8 +255,8 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
/// SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -254,7 +272,8 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
int Offset = 0;
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr){
Offset += PI->getOperand(2).getImm();
MBB.erase(PI);
@@ -626,6 +645,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
bool IsWin64 = STI.isTargetWin64();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -879,7 +899,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (isEAXAlive) {
// Restore EAX
@@ -891,7 +911,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
@@ -935,6 +955,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -1015,7 +1036,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// We cannot use LEA here, because stack pointer was realigned. We need to
// deallocate local frame back.
if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
+ *RegInfo);
MBBI = prior(LastCSPop);
}
@@ -1036,7 +1058,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
}
// We're returning from function via eh_return.
@@ -1071,7 +1093,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
}
// Jump to label or value in register.
@@ -1115,7 +1137,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
}
}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index a9d95d3..f1ef118 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -257,6 +257,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
// Set processor type. Currently only Atom is detected.
if (Family == 6 && Model == 28) {
X86ProcFamily = IntelAtom;
+ ToggleFeature(X86::FeatureLeaForSP);
}
unsigned MaxExtLevel;
@@ -340,6 +341,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, IsUAMemFast(false)
, HasVectorUAMem(false)
, HasCmpxchg16b(false)
+ , UseLeaForSP(false)
, PostRAScheduler(false)
, stackAlignment(4)
// FIXME: this is a known good value for Yonah. How about others?
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f930806..9e8b3f9 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -132,6 +132,10 @@ protected:
/// this is true for most x86-64 chips, but not the first AMD chips.
bool HasCmpxchg16b;
+ /// UseLeaForSP - True if the LEA instruction should be used for adjusting
+ /// the stack pointer. This is an optimization for Intel Atom processors.
+ bool UseLeaForSP;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -214,6 +218,7 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+ bool useLeaForSP() const { return UseLeaForSP; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }