diff options
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.cpp | 173 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.h | 14 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCMachineFunctionInfo.h | 7 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 81 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.h | 6 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/stack-realign.ll | 151 |
6 files changed, 391 insertions, 41 deletions
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 8e33830..3b57390 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -26,17 +26,6 @@ using namespace llvm; -// FIXME This disables some code that aligns the stack to a boundary bigger than -// the default (16 bytes on Darwin) when there is a stack local of greater -// alignment. This does not currently work, because the delta between old and -// new stack pointers is added to offsets that reference incoming parameters -// after the prolog is generated, and the code that does that doesn't handle a -// variable delta. You don't want to do that anyway; a better approach is to -// reserve another register that retains to the incoming stack pointer, and -// reference parameters relative to that. -#define ALIGN_STACK 0 - - /// VRRegNo - Map from a numbered VR register to its enum value. /// static const uint16_t VRRegNo[] = { @@ -217,9 +206,12 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. - unsigned MaxAlign = MFI->getMaxAlignment(); unsigned TargetAlign = getStackAlignment(); - unsigned AlignMask = TargetAlign - 1; // + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; + + const PPCRegisterInfo *RegInfo = + static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need @@ -235,7 +227,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. + !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) MFI->setStackSize(0); @@ -332,6 +324,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + const PPCRegisterInfo *RegInfo = + static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); @@ -358,6 +352,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Work out frame sizes. unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (!isInt<32>(NegFrameSize)) + llvm_unreachable("Unhandled stack size!"); if (MFI->isFrameAddressTaken()) replaceFPWithRealFP(MF); @@ -372,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); + bool HasBP = RegInfo->hasBasePointer(MF); int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); @@ -387,6 +384,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { } } + int BPOffset = 0; + if (HasBP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = FFI->getObjectOffset(BPIndex); + } else { + BPOffset = + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, + HasFP); + } + } + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); @@ -404,6 +415,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addImm(FPOffset) .addReg(PPC::X1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(HasFP ? PPC::X30 : PPC::X31) + .addImm(BPOffset) + .addReg(PPC::X1); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) .addReg(PPC::X0) @@ -427,6 +444,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addImm(FPOffset) .addReg(PPC::R1); + if (HasBP) + // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative + // offsets of R1 is not allowed. + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(HasFP ? PPC::R30 : PPC::R31) + .addImm(BPOffset) + .addReg(PPC::R1); + assert(MustSaveCRs.empty() && "Prologue CR saving supported only in 64-bit mode"); @@ -441,26 +466,44 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (!FrameSize) return; // Get stack alignments. - unsigned TargetAlign = getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (!isPPC64) { // PPC32. - if (ALIGN_STACK && MaxAlign > TargetAlign) { + + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), + HasFP ? PPC::R30 : PPC::R31) + .addReg(PPC::R1) + .addReg(PPC::R1); + } + + if (HasBP && MaxAlign > 1) { assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) .addReg(PPC::R1) .addImm(0) .addImm(32 - Log2_32(MaxAlign)) .addImm(31); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize); + if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12) + .addReg(PPC::R12, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addReg(PPC::R12, RegState::Kill); + } BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) .addReg(PPC::R1) @@ -482,18 +525,36 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0); } } else { // PPC64. - if (ALIGN_STACK && MaxAlign > TargetAlign) { + if (HasBP) { + // Save a copy of r1 as the base pointer. + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), + HasFP ? PPC::X30 : PPC::X31) + .addReg(PPC::X1) + .addReg(PPC::X1); + } + + if (HasBP && MaxAlign > 1) { assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) .addReg(PPC::X1) .addImm(0) .addImm(64 - Log2_32(MaxAlign)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) - .addReg(PPC::X0) - .addImm(NegFrameSize); + if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(NegFrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12) + .addReg(PPC::X12, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addReg(PPC::X12, RegState::Kill); + } BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) .addReg(PPC::X1) @@ -535,6 +596,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); } + if (HasBP) { + unsigned Reg = isPPC64 ? (HasFP ? PPC::X30 : PPC::X31) : + (HasFP ? PPC::R30 : PPC::R31); + Reg = MRI->getDwarfRegNum(Reg, true); + MMI.addFrameInst( + MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); + } + if (MustSaveLR) { unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR; Reg = MRI->getDwarfRegNum(Reg, true); @@ -614,6 +683,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(MBBI != MBB.end() && "Returning block has no terminator"); const PPCInstrInfo &TII = *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + const PPCRegisterInfo *RegInfo = + static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl; @@ -629,8 +700,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get alignment info so we know how to restore r1 const MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned TargetAlign = getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); // Get the number of bytes allocated from the FrameInfo. int FrameSize = MFI->getStackSize(); @@ -645,6 +714,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); + bool HasBP = RegInfo->hasBasePointer(MF); int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); @@ -660,6 +730,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } + int BPOffset = 0; + if (HasBP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = FFI->getObjectOffset(BPIndex); + } else { + BPOffset = + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI, + HasFP); + } + } + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || RetOpcode == PPC::TCRETURNdi || RetOpcode == PPC::TCRETURNai || @@ -704,7 +788,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(PPC::R31) .addReg(PPC::R0); } else if (isInt<16>(FrameSize) && - (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) .addReg(PPC::R1).addImm(FrameSize); @@ -727,7 +811,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(PPC::X1) .addReg(PPC::X31) .addReg(PPC::X0); - } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && + } else if (isInt<16>(FrameSize) && !HasBP && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X1).addImm(FrameSize); @@ -751,6 +835,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) .addImm(FPOffset).addReg(PPC::X1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), HasFP ? PPC::X30 : PPC::X31) + .addImm(BPOffset).addReg(PPC::X1); + if (!MustSaveCRs.empty()) for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) @@ -770,6 +858,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) .addImm(FPOffset).addReg(PPC::R1); + if (HasBP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), HasFP ? PPC::R30 : PPC::R31) + .addImm(FPOffset).addReg(PPC::R1); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); } @@ -848,7 +940,8 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const PPCRegisterInfo *RegInfo = + static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -873,6 +966,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FI->setFramePointerSaveIndex(FPSI); } + int BPSI = FI->getBasePointerSaveIndex(); + if (!BPSI && RegInfo->hasBasePointer(MF)) { + int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, needsFP(MF)); + // Allocate the frame index for the base pointer save area. + BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); + // Save the result. + FI->setBasePointerSaveIndex(BPSI); + } + // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (MF.getTarget().Options.GuaranteedTailCallOpt && @@ -1004,6 +1106,17 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } + const PPCRegisterInfo *RegInfo = + static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (RegInfo->hasBasePointer(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getBasePointerSaveIndex(); + assert(FI && "No Base Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + // General register save area starts right below the Floating-point // register save area. if (HasGPSaveArea || HasG8SaveArea) { diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 6f5f936..9acf129 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -94,6 +94,20 @@ public: return isPPC64 ? -8U : -4U; } + /// getBasePointerSaveOffset - Return the previous frame offset to save the + /// base pointer. + static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI, + bool hasFP) { + if (!hasFP) + return getFramePointerSaveOffset(isPPC64, isDarwinABI); + + if (isDarwinABI) + return isPPC64 ? -16U : -8U; + + // SVR4 ABI: First slot in the general register save area. + return isPPC64 ? -16U : -8U; + } + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 3b2ac3b..33f843d 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -32,6 +32,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// int ReturnAddrSaveIndex; + /// Frame index where the old base pointer is stored. + int BasePointerSaveIndex; + /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current /// function. This is only valid after the initial scan of the function by /// PEI. @@ -93,6 +96,7 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + BasePointerSaveIndex(0), HasSpills(false), HasNonRISpills(false), SpillsCR(false), @@ -113,6 +117,9 @@ public: int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } + int getBasePointerSaveIndex() const { return BasePointerSaveIndex; } + void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; } + unsigned getMinReservedArea() const { return MinReservedArea; } void setMinReservedArea(unsigned size) { MinReservedArea = size; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8a0954c..49de8da 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -48,6 +48,14 @@ using namespace llvm; +static cl::opt<bool> +EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + +static cl::opt<bool> +AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), + cl::desc("Force the use of a base pointer in every function")); + PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, @@ -170,18 +178,28 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); - if (PPCFI->needsFP(MF)) + if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { Reserved.set(PPC::X31); + // If we need a base pointer, and we also have a frame pointer, then use + // r30 as the base pointer. + if (PPCFI->needsFP(MF) && hasBasePointer(MF)) + Reserved.set(PPC::X30); + } + // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } } - if (PPCFI->needsFP(MF)) + if (PPCFI->needsFP(MF) || hasBasePointer(MF)) { Reserved.set(PPC::R31); + if (PPCFI->needsFP(MF) && hasBasePointer(MF)) + Reserved.set(PPC::R30); + } + // Reserve Altivec registers when Altivec is unavailable. if (!Subtarget.hasAltivec()) for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(), @@ -524,7 +542,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); @@ -562,12 +579,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). - - bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? - (is64Bit ? PPC::X31 : PPC::R31) : - (is64Bit ? PPC::X1 : PPC::R1), - false); + MI.getOperand(FIOperandNum).ChangeToRegister( + FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false); // Figure out if the offset in the instruction is shifted right two bits. bool isIXAddr = usesIXAddr(MI); @@ -586,8 +599,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. if (!MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) - Offset += MFI->getStackSize(); + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) { + if (!(hasBasePointer(MF) && FrameIndex < 0)) + Offset += MFI->getStackSize(); + } // If we can, encode the offset directly into the instruction. If this is a // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If @@ -605,6 +620,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. + bool is64Bit = Subtarget.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; @@ -658,6 +674,49 @@ unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } +unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!hasBasePointer(MF)) + return getFrameRegister(MF); + + if (!Subtarget.isPPC64()) + return TFI->hasFP(MF) ? PPC::R30 : PPC::R31; + else + return TFI->hasFP(MF) ? PPC::X30 : PPC::X31; +} + +bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { + if (!EnableBasePointer) + return false; + if (AlwaysBasePointer) + return true; + + // If we need to realign the stack, then the stack pointer can no longer + // serve as an offset into the caller's stack space. As a result, we need a + // base pointer. + return needsStackRealignment(MF); +} + +bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!MF.getTarget().Options.RealignStack) + return false; + + return true; +} + +bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + /// Returns true if the instruction's frame index /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 93626a9..d02af9e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -92,6 +92,12 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; + // Base pointer (stack realignment) support. + unsigned getBaseRegister(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const; + // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll new file mode 100644 index 0000000..7bd28f6 --- /dev/null +++ b/test/CodeGen/PowerPC/stack-realign.ll @@ -0,0 +1,151 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -disable-fp-elim < %s | FileCheck -check-prefix=CHECK-FP %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s = type { i32, i32 } + +declare void @bar(i32*) + +define void @goo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 32 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) + ret void +} + +; CHECK-LABEL: @goo + +; CHECK-DAG: mflr 0 +; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59 +; CHECK-DAG: std 31, -8(1) +; CHECK-DAG: mr 31, 1 +; CHECK-DAG: std 0, 16(1) +; CHECK-DAG: subfic 0, [[REG]], -160 +; CHECK: stdux 1, 1, 0 + +; CHECK: .cfi_offset r31, -8 +; CHECK: .cfi_offset lr, 16 + +; CHECK: std 3, 48(31) + +; CHECK: ld 1, 0(1) +; CHECK-DAG: ld 0, 16(1) +; CHECK-DAG: ld 31, -8(1) +; CHECK-DAG: mtlr 0 +; CHECK: blr + +; CHECK-FP-LABEL: @goo + +; CHECK-FP-DAG: mflr 0 +; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59 +; CHECK-FP-DAG: std 31, -8(1) +; CHECK-FP-DAG: std 30, -16(1) +; CHECK-FP-DAG: mr 30, 1 +; CHECK-FP-DAG: std 0, 16(1) +; CHECK-FP-DAG: subfic 0, [[REG]], -160 +; CHECK-FP: stdux 1, 1, 0 + +; CHECK-FP: .cfi_offset r31, -8 +; CHECK-FP: .cfi_offset r30, -16 +; CHECK-FP: .cfi_offset lr, 16 + +; CHECK-FP: mr 31, 1 + +; CHECK-FP: std 3, 48(30) + +; CHECK-FP: ld 1, 0(1) +; CHECK-FP-DAG: ld 0, 16(1) +; CHECK-FP-DAG: ld 31, -8(1) +; CHECK-FP-DAG: ld 30, -16(1) +; CHECK-FP-DAG: mtlr 0 +; CHECK-FP: blr + +; The large-frame-size case. +define void @hoo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [200000 x i32], align 32 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) + ret void +} + +; CHECK-LABEL: @hoo + +; CHECK-DAG: lis [[REG1:[0-9]+]], -13 +; CHECK-DAG: rldicl [[REG3:[0-9]+]], 1, 0, 59 +; CHECK-DAG: mflr 0 +; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808 +; CHECK-DAG: std 31, -8(1) +; CHECK-DAG: mr 31, 1 +; CHECK-DAG: std 0, 16(1) +; CHECK-DAG: subfc 0, [[REG3]], [[REG2]] +; CHECK: stdux 1, 1, 0 + +; CHECK: blr + +; Make sure that the FP save area is still allocated correctly relative to +; where r30 is saved. +define void @loo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 32 + %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %0 = load i32* %a1, align 4, !tbaa !0 + %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %1 = load i32* %b, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4, !tbaa !0 + call void @bar(i32* %arrayidx) + call void asm sideeffect "", "~{f30}"() nounwind + ret void +} + +; CHECK-LABEL: @loo + +; CHECK-DAG: mflr 0 +; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59 +; CHECK-DAG: std 31, -24(1) +; CHECK-DAG: mr 31, 1 +; CHECK-DAG: std 0, 16(1) +; CHECK-DAG: subfic 0, [[REG]], -160 +; CHECK: stdux 1, 1, 0 + +; CHECK: stfd 30, -16(31) + +; CHECK: blr + +; CHECK-FP-LABEL: @loo + +; CHECK-FP-DAG: mflr 0 +; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59 +; CHECK-FP-DAG: std 31, -24(1) +; CHECK-FP-DAG: std 30, -32(1) +; CHECK-FP-DAG: mr 30, 1 +; CHECK-FP-DAG: std 0, 16(1) +; CHECK-FP-DAG: subfic 0, [[REG]], -192 +; CHECK-FP: stdux 1, 1, 0 + +; CHECK-FP: stfd 30, -16(30) + +; CHECK-FP: blr + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} |