aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp173
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h14
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h7
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp81
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h6
-rw-r--r--test/CodeGen/PowerPC/stack-realign.ll151
6 files changed, 391 insertions, 41 deletions
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 8e33830..3b57390 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -26,17 +26,6 @@
using namespace llvm;
-// FIXME This disables some code that aligns the stack to a boundary bigger than
-// the default (16 bytes on Darwin) when there is a stack local of greater
-// alignment. This does not currently work, because the delta between old and
-// new stack pointers is added to offsets that reference incoming parameters
-// after the prolog is generated, and the code that does that doesn't handle a
-// variable delta. You don't want to do that anyway; a better approach is to
-// reserve another register that retains to the incoming stack pointer, and
-// reference parameters relative to that.
-#define ALIGN_STACK 0
-
-
/// VRRegNo - Map from a numbered VR register to its enum value.
///
static const uint16_t VRRegNo[] = {
@@ -217,9 +206,12 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
- unsigned MaxAlign = MFI->getMaxAlignment();
unsigned TargetAlign = getStackAlignment();
- unsigned AlignMask = TargetAlign - 1; //
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -235,7 +227,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ !RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
MFI->setStackSize(0);
@@ -332,6 +324,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -358,6 +352,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Work out frame sizes.
unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (!isInt<32>(NegFrameSize))
+ llvm_unreachable("Unhandled stack size!");
if (MFI->isFrameAddressTaken())
replaceFPWithRealFP(MF);
@@ -372,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
@@ -387,6 +384,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ int BPOffset = 0;
+ if (HasBP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI,
+ HasFP);
+ }
+ }
+
if (isPPC64) {
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
@@ -404,6 +415,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addImm(FPOffset)
.addReg(PPC::X1);
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(HasFP ? PPC::X30 : PPC::X31)
+ .addImm(BPOffset)
+ .addReg(PPC::X1);
+
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
.addReg(PPC::X0)
@@ -427,6 +444,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addImm(FPOffset)
.addReg(PPC::R1);
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
+ // offsets of R1 is not allowed.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(HasFP ? PPC::R30 : PPC::R31)
+ .addImm(BPOffset)
+ .addReg(PPC::R1);
+
assert(MustSaveCRs.empty() &&
"Prologue CR saving supported only in 64-bit mode");
@@ -441,26 +466,44 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (!FrameSize) return;
// Get stack alignments.
- unsigned TargetAlign = getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
if (!isPPC64) {
// PPC32.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
+
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR),
+ HasFP ? PPC::R30 : PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ }
+
+ if (HasBP && MaxAlign > 1) {
assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
"Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
.addReg(PPC::R1)
.addImm(0)
.addImm(32 - Log2_32(MaxAlign))
.addImm(31);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(NegFrameSize);
+ if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12)
+ .addReg(PPC::R12, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addReg(PPC::R12, RegState::Kill);
+ }
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
.addReg(PPC::R1)
@@ -482,18 +525,36 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::R0);
}
} else { // PPC64.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8),
+ HasFP ? PPC::X30 : PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+
+ if (HasBP && MaxAlign > 1) {
assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
"Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
.addReg(PPC::X1)
.addImm(0)
.addImm(64 - Log2_32(MaxAlign));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
- .addReg(PPC::X0)
- .addImm(NegFrameSize);
+ if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12)
+ .addReg(PPC::X12, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addReg(PPC::X12, RegState::Kill);
+ }
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
.addReg(PPC::X1)
@@ -535,6 +596,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
}
+ if (HasBP) {
+ unsigned Reg = isPPC64 ? (HasFP ? PPC::X30 : PPC::X31) :
+ (HasFP ? PPC::R30 : PPC::R31);
+ Reg = MRI->getDwarfRegNum(Reg, true);
+ MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
+ }
+
if (MustSaveLR) {
unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR;
Reg = MRI->getDwarfRegNum(Reg, true);
@@ -614,6 +683,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
@@ -629,8 +700,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get alignment info so we know how to restore r1
const MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned TargetAlign = getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
// Get the number of bytes allocated from the FrameInfo.
int FrameSize = MFI->getStackSize();
@@ -645,6 +714,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
@@ -660,6 +730,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+ int BPOffset = 0;
+ if (HasBP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI,
+ HasFP);
+ }
+ }
+
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
RetOpcode == PPC::TCRETURNdi ||
RetOpcode == PPC::TCRETURNai ||
@@ -704,7 +788,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(PPC::R31)
.addReg(PPC::R0);
} else if (isInt<16>(FrameSize) &&
- (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !HasBP &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
.addReg(PPC::R1).addImm(FrameSize);
@@ -727,7 +811,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(PPC::X1)
.addReg(PPC::X31)
.addReg(PPC::X0);
- } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+ } else if (isInt<16>(FrameSize) && !HasBP &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X1).addImm(FrameSize);
@@ -751,6 +835,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
.addImm(FPOffset).addReg(PPC::X1);
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), HasFP ? PPC::X30 : PPC::X31)
+ .addImm(BPOffset).addReg(PPC::X1);
+
if (!MustSaveCRs.empty())
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
@@ -770,6 +858,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
.addImm(FPOffset).addReg(PPC::R1);
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), HasFP ? PPC::R30 : PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
}
@@ -848,7 +940,8 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -873,6 +966,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
FI->setFramePointerSaveIndex(FPSI);
}
+ int BPSI = FI->getBasePointerSaveIndex();
+ if (!BPSI && RegInfo->hasBasePointer(MF)) {
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, needsFP(MF));
+ // Allocate the frame index for the base pointer save area.
+ BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
+ // Save the result.
+ FI->setBasePointerSaveIndex(BPSI);
+ }
+
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
@@ -1004,6 +1106,17 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (RegInfo->hasBasePointer(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getBasePointerSaveIndex();
+ assert(FI && "No Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
// General register save area starts right below the Floating-point
// register save area.
if (HasGPSaveArea || HasG8SaveArea) {
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 6f5f936..9acf129 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -94,6 +94,20 @@ public:
return isPPC64 ? -8U : -4U;
}
+ /// getBasePointerSaveOffset - Return the previous frame offset to save the
+ /// base pointer.
+ static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI,
+ bool hasFP) {
+ if (!hasFP)
+ return getFramePointerSaveOffset(isPPC64, isDarwinABI);
+
+ if (isDarwinABI)
+ return isPPC64 ? -16U : -8U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -16U : -8U;
+ }
+
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 3b2ac3b..33f843d 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -32,6 +32,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
///
int ReturnAddrSaveIndex;
+ /// Frame index where the old base pointer is stored.
+ int BasePointerSaveIndex;
+
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
/// PEI.
@@ -93,6 +96,7 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ BasePointerSaveIndex(0),
HasSpills(false),
HasNonRISpills(false),
SpillsCR(false),
@@ -113,6 +117,9 @@ public:
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+ int getBasePointerSaveIndex() const { return BasePointerSaveIndex; }
+ void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; }
+
unsigned getMinReservedArea() const { return MinReservedArea; }
void setMinReservedArea(unsigned size) { MinReservedArea = size; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 8a0954c..49de8da 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -48,6 +48,14 @@
using namespace llvm;
+static cl::opt<bool>
+EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+static cl::opt<bool>
+AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
+ cl::desc("Force the use of a base pointer in every function"));
+
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
@@ -170,18 +178,28 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- if (PPCFI->needsFP(MF))
+ if (PPCFI->needsFP(MF) || hasBasePointer(MF)) {
Reserved.set(PPC::X31);
+ // If we need a base pointer, and we also have a frame pointer, then use
+ // r30 as the base pointer.
+ if (PPCFI->needsFP(MF) && hasBasePointer(MF))
+ Reserved.set(PPC::X30);
+ }
+
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
}
- if (PPCFI->needsFP(MF))
+ if (PPCFI->needsFP(MF) || hasBasePointer(MF)) {
Reserved.set(PPC::R31);
+ if (PPCFI->needsFP(MF) && hasBasePointer(MF))
+ Reserved.set(PPC::R30);
+ }
+
// Reserve Altivec registers when Altivec is unavailable.
if (!Subtarget.hasAltivec())
for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
@@ -524,7 +542,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
@@ -562,12 +579,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
-
- bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
- (is64Bit ? PPC::X31 : PPC::R31) :
- (is64Bit ? PPC::X1 : PPC::R1),
- false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(
+ FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
// Figure out if the offset in the instruction is shifted right two bits.
bool isIXAddr = usesIXAddr(MI);
@@ -586,8 +599,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
if (!MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
- Offset += MFI->getStackSize();
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) {
+ if (!(hasBasePointer(MF) && FrameIndex < 0))
+ Offset += MFI->getStackSize();
+ }
// If we can, encode the offset directly into the instruction. If this is a
// normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
@@ -605,6 +620,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
+ bool is64Bit = Subtarget.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -658,6 +674,49 @@ unsigned PPCRegisterInfo::getEHHandlerRegister() const {
return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
}
+unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!hasBasePointer(MF))
+ return getFrameRegister(MF);
+
+ if (!Subtarget.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R30 : PPC::R31;
+ else
+ return TFI->hasFP(MF) ? PPC::X30 : PPC::X31;
+}
+
+bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ if (!EnableBasePointer)
+ return false;
+ if (AlwaysBasePointer)
+ return true;
+
+ // If we need to realign the stack, then the stack pointer can no longer
+ // serve as an offset into the caller's stack space. As a result, we need a
+ // base pointer.
+ return needsStackRealignment(MF);
+}
+
+bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (!MF.getTarget().Options.RealignStack)
+ return false;
+
+ return true;
+}
+
+bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
/// Returns true if the instruction's frame index
/// reference would be better served by a base register other than FP
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 93626a9..d02af9e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -92,6 +92,12 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
+ // Base pointer (stack realignment) support.
+ unsigned getBaseRegister(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
new file mode 100644
index 0000000..7bd28f6
--- /dev/null
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -0,0 +1,151 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -disable-fp-elim < %s | FileCheck -check-prefix=CHECK-FP %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s = type { i32, i32 }
+
+declare void @bar(i32*)
+
+define void @goo(%struct.s* byval nocapture readonly %a) {
+entry:
+ %x = alloca [2 x i32], align 32
+ %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+ %0 = load i32* %a1, align 4, !tbaa !0
+ %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+ store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+ %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+ %1 = load i32* %b, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+ store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+ call void @bar(i32* %arrayidx)
+ ret void
+}
+
+; CHECK-LABEL: @goo
+
+; CHECK-DAG: mflr 0
+; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: std 31, -8(1)
+; CHECK-DAG: mr 31, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfic 0, [[REG]], -160
+; CHECK: stdux 1, 1, 0
+
+; CHECK: .cfi_offset r31, -8
+; CHECK: .cfi_offset lr, 16
+
+; CHECK: std 3, 48(31)
+
+; CHECK: ld 1, 0(1)
+; CHECK-DAG: ld 0, 16(1)
+; CHECK-DAG: ld 31, -8(1)
+; CHECK-DAG: mtlr 0
+; CHECK: blr
+
+; CHECK-FP-LABEL: @goo
+
+; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: std 31, -8(1)
+; CHECK-FP-DAG: std 30, -16(1)
+; CHECK-FP-DAG: mr 30, 1
+; CHECK-FP-DAG: std 0, 16(1)
+; CHECK-FP-DAG: subfic 0, [[REG]], -160
+; CHECK-FP: stdux 1, 1, 0
+
+; CHECK-FP: .cfi_offset r31, -8
+; CHECK-FP: .cfi_offset r30, -16
+; CHECK-FP: .cfi_offset lr, 16
+
+; CHECK-FP: mr 31, 1
+
+; CHECK-FP: std 3, 48(30)
+
+; CHECK-FP: ld 1, 0(1)
+; CHECK-FP-DAG: ld 0, 16(1)
+; CHECK-FP-DAG: ld 31, -8(1)
+; CHECK-FP-DAG: ld 30, -16(1)
+; CHECK-FP-DAG: mtlr 0
+; CHECK-FP: blr
+
+; The large-frame-size case.
+define void @hoo(%struct.s* byval nocapture readonly %a) {
+entry:
+ %x = alloca [200000 x i32], align 32
+ %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+ %0 = load i32* %a1, align 4, !tbaa !0
+ %arrayidx = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 0
+ store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+ %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+ %1 = load i32* %b, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 1
+ store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+ call void @bar(i32* %arrayidx)
+ ret void
+}
+
+; CHECK-LABEL: @hoo
+
+; CHECK-DAG: lis [[REG1:[0-9]+]], -13
+; CHECK-DAG: rldicl [[REG3:[0-9]+]], 1, 0, 59
+; CHECK-DAG: mflr 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808
+; CHECK-DAG: std 31, -8(1)
+; CHECK-DAG: mr 31, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
+; CHECK: stdux 1, 1, 0
+
+; CHECK: blr
+
+; Make sure that the FP save area is still allocated correctly relative to
+; where r30 is saved.
+define void @loo(%struct.s* byval nocapture readonly %a) {
+entry:
+ %x = alloca [2 x i32], align 32
+ %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+ %0 = load i32* %a1, align 4, !tbaa !0
+ %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+ store i32 %0, i32* %arrayidx, align 32, !tbaa !0
+ %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+ %1 = load i32* %b, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+ store i32 %1, i32* %arrayidx2, align 4, !tbaa !0
+ call void @bar(i32* %arrayidx)
+ call void asm sideeffect "", "~{f30}"() nounwind
+ ret void
+}
+
+; CHECK-LABEL: @loo
+
+; CHECK-DAG: mflr 0
+; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: std 31, -24(1)
+; CHECK-DAG: mr 31, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfic 0, [[REG]], -160
+; CHECK: stdux 1, 1, 0
+
+; CHECK: stfd 30, -16(31)
+
+; CHECK: blr
+
+; CHECK-FP-LABEL: @loo
+
+; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: std 31, -24(1)
+; CHECK-FP-DAG: std 30, -32(1)
+; CHECK-FP-DAG: mr 30, 1
+; CHECK-FP-DAG: std 0, 16(1)
+; CHECK-FP-DAG: subfic 0, [[REG]], -192
+; CHECK-FP: stdux 1, 1, 0
+
+; CHECK-FP: stfd 30, -16(30)
+
+; CHECK-FP: blr
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}