diff options
-rw-r--r-- | lib/Target/PowerPC/PPCFrameInfo.h | 34 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 75 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 6 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 11 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCMachineFunctionInfo.h | 43 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 376 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.h | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.td | 4 |
9 files changed, 409 insertions, 150 deletions
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 40305be..ed67496 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -35,6 +35,40 @@ public: NumEntries = 1; return &LR[0]; } + + /// getFramePointerSaveOffset - Return the previous frame offset to save the + /// frame pointer. + static unsigned getFramePointerSaveOffset(bool LP64) { + // Use the TOC save slot in the PowerPC linkage area for saving the frame + // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 + // is treated as a caller saved register.) + return LP64 ? 40 : 20; + } + + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. + /// + static unsigned getLinkageSize(bool LP64) { + return 6 * (LP64 ? 8 : 4); + } + + /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI + /// argument area. + static unsigned getMinCallArgumentsSize(bool LP64) { + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + return 8 * (LP64 ? 8 : 4); + } + + /// getMinCallFrameSize - Return the minimum size a call frame can be using + /// the PowerPC ABI. + static unsigned getMinCallFrameSize(bool LP64) { + // The call frame needs to be at least big enough for linkage and 8 args. + return getLinkageSize(LP64) + getMinCallArgumentsSize(LP64); + } + }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ae8240c..59ae3e4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "PPCPerfectShuffle.h" #include "llvm/ADT/VectorExtras.h" @@ -169,8 +170,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -1082,11 +1083,10 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, SmallVector<SDOperand, 8> ArgValues; SDOperand Root = Op.getOperand(0); - unsigned ArgOffset = 24; - const unsigned Num_GPR_Regs = 8; - const unsigned Num_FPR_Regs = 13; - const unsigned Num_VR_Regs = 12; - unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64); static const unsigned GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, @@ -1105,13 +1105,17 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - bool isPPC64 = PtrVT == MVT::i64; + const unsigned Num_GPR_Regs = sizeof(GPR_32)/sizeof(GPR_32[0]); + const unsigned Num_FPR_Regs = sizeof(FPR)/sizeof(FPR[0]); + const unsigned Num_VR_Regs = sizeof( VR)/sizeof( VR[0]); + + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; // Add DAG nodes to load the arguments or copy them out of registers. On - // entry to a function on PPC, the arguments start at offset 24, although the - // first ones are often in registers. + // entry to a function on PPC, the arguments start after the linkage area, + // although the first ones are often in registers. for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { SDOperand ArgVal; bool needsLoad = false; @@ -1266,7 +1270,6 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; - // args_to_use will accumulate outgoing args for the PPCISD::CALL case in // SelectExpr to use to put the arguments in the appropriate registers. @@ -1275,7 +1278,7 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = 6*PtrByteSize; + unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64); // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) @@ -1286,8 +1289,7 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - if (NumBytes < 6*PtrByteSize+8*PtrByteSize) - NumBytes = 6*PtrByteSize+8*PtrByteSize; + NumBytes = std::max(NumBytes, PPCFrameInfo::getMinCallFrameSize(isPPC64)); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass @@ -1307,8 +1309,9 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = 6*PtrByteSize; + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + static const unsigned GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1585,6 +1588,44 @@ static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); } +static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + MachineFunction &MF = DAG.getMachineFunction(); + bool IsPPC64 = Subtarget.isPPC64(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + int FPSI = FI->getFramePointerSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI) { + // Find out what the fix offset of the frame pointer save area. + int Offset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64); + // Allocate the frame index for frame pointer save area. + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, Offset); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + + // Get the inputs. + SDOperand Chain = Op.getOperand(0); + SDOperand Size = Op.getOperand(1); + + // Get the corect type for pointers. + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Negate the size. + SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, + DAG.getConstant(0, PtrVT), Size); + // Construct a node for the frame pointer save index. + SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT); + // Build a DYNALLOC node. + SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); + return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); +} + + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { @@ -2517,6 +2558,8 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, + PPCSubTarget); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index cb4dc47..47fd07d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -60,6 +60,11 @@ namespace llvm { /// though these are usually folded into other nodes. Hi, Lo, + /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) + /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to + /// compute an allocation on the stack. + DYNALLOC, + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 7d895d8..7c3355a 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -100,6 +100,12 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (ops G8RC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +def DYNALLOC8 : Pseudo<(ops G8RC:$result, G8RC:$negsize, memri:$fpsi), + "${:comment} DYNALLOC8 $result, $negsize, $fpsi", + [(set G8RC:$result, + (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>, + Imp<[X1],[X1]>; + def MTLR8 : XFXForm_7_ext<31, 467, 8, (ops G8RC:$rS), "mtlr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; def MFLR8 : XFXForm_1_ext<31, 339, 8, (ops G8RC:$rT), "mflr $rT", SprMFSPR>, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 14e6e29..d55a07f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -100,6 +100,10 @@ def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain]>; +// Instructions to support dynamic alloca. +def SDTDynOp : SDTypeProfile<1, 2, []>; +def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. // @@ -297,6 +301,13 @@ def ADJCALLSTACKUP : Pseudo<(ops u16imm:$amt), def UPDATE_VRSAVE : Pseudo<(ops GPRC:$rD, GPRC:$rS), "UPDATE_VRSAVE $rD, $rS", []>; } + +def DYNALLOC : Pseudo<(ops GPRC:$result, GPRC:$negsize, memri:$fpsi), + "${:comment} DYNALLOC $result, $negsize, $fpsi", + [(set GPRC:$result, + (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>, + Imp<[R1],[R1]>; + def IMPLICIT_DEF_GPRC: Pseudo<(ops GPRC:$rD),"${:comment}IMPLICIT_DEF_GPRC $rD", [(set GPRC:$rD, (undef))]>; def IMPLICIT_DEF_F8 : Pseudo<(ops F8RC:$rD), "${:comment} IMPLICIT_DEF_F8 $rD", diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h new file mode 100644 index 0000000..28f9701 --- /dev/null +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -0,0 +1,43 @@ +//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by James M. Laskey and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_MACHINE_FUNCTION_INFO_H +#define PPC_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// PPCFunctionInfo - This class is derived from MachineFunction private +/// PowerPC target-specific information for each MachineFunction. +class PPCFunctionInfo : public MachineFunctionInfo { +private: + /// FramePointerSaveIndex - Frame index of where the old frame pointer is + /// stored. Also used as an anchor for instructions that need to be altered + /// when using frame pointers (dyna_add, dyna_sub.) + int FramePointerSaveIndex; + +public: + PPCFunctionInfo(MachineFunction& MF) + : FramePointerSaveIndex(0) + {} + + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + +}; + +} // end of namespace llvm + + +#endif
\ No newline at end of file diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 5526a6e..b9a4455 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -14,7 +14,9 @@ #define DEBUG_TYPE "reginfo" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCMachineFunctionInfo.h" #include "PPCRegisterInfo.h" +#include "PPCFrameInfo.h" #include "PPCSubtarget.h" #include "llvm/Constants.h" #include "llvm/Type.h" @@ -232,7 +234,7 @@ void PPCRegisterInfo::copyRegToReg(MachineBasicBlock &MBB, const unsigned* PPCRegisterInfo::getCalleeSaveRegs() const { // 32-bit Darwin calling convention. static const unsigned Darwin32_CalleeSaveRegs[] = { - PPC::R1 , PPC::R13, PPC::R14, PPC::R15, + PPC::R13, PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, PPC::R24, PPC::R25, PPC::R26, PPC::R27, @@ -253,7 +255,7 @@ const unsigned* PPCRegisterInfo::getCalleeSaveRegs() const { }; // 64-bit Darwin calling convention. static const unsigned Darwin64_CalleeSaveRegs[] = { - PPC::X1 , PPC::X13, PPC::X14, PPC::X15, + PPC::X13, PPC::X14, PPC::X15, PPC::X16, PPC::X17, PPC::X18, PPC::X19, PPC::X20, PPC::X21, PPC::X22, PPC::X23, PPC::X24, PPC::X25, PPC::X26, PPC::X27, @@ -281,7 +283,7 @@ const TargetRegisterClass* const* PPCRegisterInfo::getCalleeSaveRegClasses() const { // 32-bit Darwin calling convention. static const TargetRegisterClass * const Darwin32_CalleeSaveRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, @@ -304,7 +306,7 @@ PPCRegisterInfo::getCalleeSaveRegClasses() const { // 64-bit Darwin calling convention. static const TargetRegisterClass * const Darwin64_CalleeSaveRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, @@ -388,81 +390,150 @@ MachineInstr *PPCRegisterInfo::foldMemoryOperand(MachineInstr *MI, // Stack Frame Processing methods //===----------------------------------------------------------------------===// -// hasFP - Return true if the specified function should have a dedicated frame +// needsFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. // -static bool hasFP(const MachineFunction &MF) { +static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // If frame pointers are forced, or if there are variable sized stack objects, - // use a frame pointer. - // return NoFramePointerElim || MFI->hasVarSizedObjects(); } +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +static bool hasFP(const MachineFunction &MF) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getStackSize() && needsFP(MF); +} + void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (hasFP(MF)) { - // If we have a frame pointer, convert as follows: - // ADJCALLSTACKDOWN -> lwz r0, 0(r31) - // stwu, r0, -amount(r1) - // ADJCALLSTACKUP -> addi, r1, r1, amount - MachineInstr *Old = I; - unsigned Amount = Old->getOperand(0).getImmedValue(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - if (Old->getOpcode() == PPC::ADJCALLSTACKDOWN) { - if (!Subtarget.isPPC64()) { - BuildMI(MBB, I, PPC::LWZ, 2, PPC::R0).addImm(0).addReg(PPC::R31); - BuildMI(MBB, I, PPC::STWU, 3, PPC::R1) - .addReg(PPC::R0).addImm(-Amount).addReg(PPC::R1); - } else { - BuildMI(MBB, I, PPC::LD, 2, PPC::X0).addImm(0).addReg(PPC::X31); - BuildMI(MBB, I, PPC::STDU, 3, PPC::X1) - .addReg(PPC::X0).addImm(-Amount/4).addReg(PPC::X1); - } - } else { - assert(Old->getOpcode() == PPC::ADJCALLSTACKUP); - BuildMI(MBB, I, PPC::ADDI, 2, PPC::R1).addReg(PPC::R1).addImm(Amount); - } - } - } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. MBB.erase(I); } +/// LowerDynamicAlloc - Generate the code for allocating an object in the +/// current frame. The sequence of code with be in the general form +/// +/// addi R0, SP, #frameSize ; get the address of the previous frame +/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size +/// addi Rnew, SP, #maxCalFrameSize ; get the top of the allocation +/// +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Determine whether 64-bit pointers are used. + bool LP64 = Subtarget.isPPC64(); + + // Determine the maximum call stack size. maxCallFrameSize may be + // less than the minimum. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + unsigned getMinCallFrameSize = + PPCFrameInfo::getMinCallFrameSize(LP64); + maxCallFrameSize = std::max(maxCallFrameSize, getMinCallFrameSize); + // Get the total frame size. + unsigned FrameSize = MFI->getStackSize(); + + // Get stack alignments. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Determine the previous frame's address. If FrameSize can't be + // represented as 16 bits or we need special alignment, then we load the + // previous frame's address from 0(SP). Why not do an addis of the hi? + // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. + // Constructing the constant and adding would take 3 instructions. + // Fortunately, a frame greater than 32K is rare. + if (MaxAlign < TargetAlign && isInt16(FrameSize)) { + BuildMI(MBB, II, PPC::ADDI, 2, PPC::R0) + .addReg(PPC::R31) + .addImm(FrameSize); + } else if (LP64) { + BuildMI(MBB, II, PPC::LD, 2, PPC::X0) + .addImm(0) + .addReg(PPC::X1); + } else { + BuildMI(MBB, II, PPC::LWZ, 2, PPC::R0) + .addImm(0) + .addReg(PPC::R1); + } + + // Grow the stack and update the stack pointer link, then + // determine the address of new allocated space. + if (LP64) { + BuildMI(MBB, II, PPC::STDUX, 3) + .addReg(PPC::X0) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); + BuildMI(MBB, II, PPC::ADDI8, 2, MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize); + } else { + BuildMI(MBB, II, PPC::STWUX, 3) + .addReg(PPC::R0) + .addReg(PPC::R1) + .addReg(MI.getOperand(1).getReg()); + BuildMI(MBB, II, PPC::ADDI, 2, MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize); + } + + // Discard the DYNALLOC instruction. + MBB.erase(II); +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const { - unsigned i = 0; + // Get the instruction. MachineInstr &MI = *II; + // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Find out which operand is the frame index. + unsigned i = 0; while (!MI.getOperand(i).isFrameIndex()) { ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - + // Take into account whether it's an add or mem instruction + unsigned OffIdx = (i == 2) ? 1 : 2; + // Get the frame index. int FrameIndex = MI.getOperand(i).getFrameIndex(); + + // Get the frame pointer save index. Users of this index are primarily + // DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + int FPSI = FI->getFramePointerSaveIndex(); + // Get the instruction opcode. + unsigned OpC = MI.getOpcode(); + + // Special case for dynamic alloca. + if (FPSI && FrameIndex == FPSI && + (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { + lowerDynamicAlloc(II); + return; + } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). MI.getOperand(i).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1, false); - // Take into account whether it's an add or mem instruction - unsigned OffIdx = (i == 2) ? 1 : 2; - // Figure out if the offset in the instruction is shifted right two bits. This // is true for instructions like "STD", which the machine implicitly adds two // low zeros to. bool isIXAddr = false; - switch (MI.getOpcode()) { + switch (OpC) { case PPC::LWA: case PPC::LD: case PPC::STD: @@ -471,9 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const { break; } - // Now add the frame object offset to the offset from r1. - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) Offset += MI.getOperand(OffIdx).getImmedValue(); @@ -483,20 +553,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const { // If we're not using a Frame Pointer that has been set to the value of the // SP before having the stack size subtracted from it, then add the stack size // to Offset to get the correct offset. - Offset += MF.getFrameInfo()->getStackSize(); + Offset += MFI->getStackSize(); - if (Offset > 32767 || Offset < -32768) { + if (!isInt16(Offset)) { // Insert a set of r0 with the full offset value before the ld, st, or add - MachineBasicBlock *MBB = MI.getParent(); - BuildMI(*MBB, II, PPC::LIS, 1, PPC::R0).addImm(Offset >> 16); - BuildMI(*MBB, II, PPC::ORI, 2, PPC::R0).addReg(PPC::R0).addImm(Offset); + BuildMI(MBB, II, PPC::LIS, 1, PPC::R0).addImm(Offset >> 16); + BuildMI(MBB, II, PPC::ORI, 2, PPC::R0).addReg(PPC::R0).addImm(Offset); // convert into indexed form of the instruction // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0 // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 - assert(ImmToIdxMap.count(MI.getOpcode()) && + assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); - unsigned NewOpcode = ImmToIdxMap.find(MI.getOpcode())->second; + unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; MI.setOpcode(NewOpcode); MI.getOperand(1).ChangeToRegister(MI.getOperand(i).getReg(), false); MI.getOperand(2).ChangeToRegister(PPC::R0, false); @@ -615,6 +684,59 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const bool *UsedRegs) { MI->eraseFromParent(); } +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned Align = std::max(TargetAlign, MaxAlign); + assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); + unsigned AlignMask = Align - 1; // + + // If we are a leaf function, and use up to 224 bytes of stack space, + // don't have a frame pointer, calls, or dynamic alloca then we do not need + // to adjust the stack pointer (we fit in the Red Zone). + if (FrameSize <= 224 && // Fits in red zone. + !needsFP(MF) && // Frame pointer can be eliminated. + !MFI->hasCalls() && // No calls. + MaxAlign <= TargetAlign) { // No special alignment. + // No need for frame + MFI->setStackSize(0); + return; + } + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // Maximum call frame needs to be at least big enough for linkage and 8 args. + unsigned minCallFrameSize = + PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64()); + maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -622,9 +744,6 @@ void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineDebugInfo *DebugInfo = MFI->getMachineDebugInfo(); - // Do we have a frame pointer for this function? - bool HasFP = hasFP(MF); - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { @@ -637,83 +756,75 @@ void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Move MBBI back to the beginning of the function. MBBI = MBB.begin(); - // Get the number of bytes to allocate from the FrameInfo - unsigned NumBytes = MFI->getStackSize(); + // Work out frame sizes. + determineFrameLayout(MF); + unsigned FrameSize = MFI->getStackSize(); - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); + // Skip if a leaf routine. + if (!FrameSize) return; + + int NegFrameSize = -FrameSize; - // If we have calls, we cannot use the red zone to store callee save registers - // and we must set up a stack frame, so calculate the necessary size here. - if (MFI->hasCalls()) { - // We reserve argument space for call sites in the function immediately on - // entry to the current function. This eliminates the need for add/sub - // brackets around call sites. - NumBytes += MFI->getMaxCallFrameSize(); - } + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF); - // If we are a leaf function, and use up to 224 bytes of stack space, - // and don't have a frame pointer, then we do not need to adjust the stack - // pointer (we fit in the Red Zone). - if ((NumBytes == 0) || (NumBytes <= 224 && !HasFP && !MFI->hasCalls() && - MaxAlign <= TargetAlign)) { - MFI->setStackSize(0); - return; + // If there is a frame pointer, copy R31 into TOC(SP) + if (HasFP) { + int Offset = PPCFrameInfo::getFramePointerSaveOffset(Subtarget.isPPC64()); + + if (!Subtarget.isPPC64()) { + BuildMI(MBB, MBBI, PPC::STW, 3) + .addReg(PPC::R31).addImm(Offset).addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, PPC::STD, 3) + .addReg(PPC::X31).addImm(Offset/4).addReg(PPC::X1); + } } + + // Get stack alignments. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); - // Add the size of R1 to NumBytes size for the store of R1 to the bottom - // of the stack and round the size to a multiple of the alignment. - unsigned Align = std::max(TargetAlign, MaxAlign); - unsigned GPRSize = Subtarget.isPPC64() ? 8 : 4; - unsigned Size = HasFP ? GPRSize + GPRSize : GPRSize; - NumBytes = (NumBytes+Size+Align-1)/Align*Align; - - // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(NumBytes); - int NegNumbytes = -NumBytes; - - // Adjust stack pointer: r1 -= numbytes. + // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (!Subtarget.isPPC64()) { // PPC32. if (MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign) && MaxAlign < 32767&&"Invalid alignment!"); - assert(isInt16(0-NumBytes) && "Unhandled stack size and alignment!"); + assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); + assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, PPC::RLWINM, 4, PPC::R0) .addReg(PPC::R1).addImm(0).addImm(32-Log2_32(MaxAlign)).addImm(31); BuildMI(MBB, MBBI, PPC::SUBFIC,2,PPC::R0).addReg(PPC::R0) - .addImm(0-NumBytes); + .addImm(NegFrameSize); BuildMI(MBB, MBBI, PPC::STWUX, 3) .addReg(PPC::R1).addReg(PPC::R1).addReg(PPC::R0); - } else if (NumBytes <= 32768) { + } else if (isInt16(NegFrameSize)) { BuildMI(MBB, MBBI, PPC::STWU, 3, - PPC::R1).addReg(PPC::R1).addImm(NegNumbytes).addReg(PPC::R1); + PPC::R1).addReg(PPC::R1).addImm(NegFrameSize).addReg(PPC::R1); } else { - BuildMI(MBB, MBBI, PPC::LIS, 1, PPC::R0).addImm(NegNumbytes >> 16); + BuildMI(MBB, MBBI, PPC::LIS, 1, PPC::R0).addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, PPC::ORI, 2, PPC::R0).addReg(PPC::R0) - .addImm(NegNumbytes & 0xFFFF); + .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, PPC::STWUX, 3).addReg(PPC::R1).addReg(PPC::R1) .addReg(PPC::R0); } } else { // PPC64. if (MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign) && MaxAlign < 32767&&"Invalid alignment!"); - assert(isInt16(0-NumBytes) && "Unhandled stack size and alignment!"); + assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); + assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, PPC::RLDICL, 3, PPC::X0) .addReg(PPC::X1).addImm(0).addImm(64-Log2_32(MaxAlign)); BuildMI(MBB, MBBI, PPC::SUBFIC8, 2, PPC::X0).addReg(PPC::X0) - .addImm(0-NumBytes); + .addImm(NegFrameSize); BuildMI(MBB, MBBI, PPC::STDUX, 3) .addReg(PPC::X1).addReg(PPC::X1).addReg(PPC::X0); - } else if (NumBytes <= 32768*4) { + } else if (isInt16(NegFrameSize/4)) { BuildMI(MBB, MBBI, PPC::STDU, 3, PPC::X1) - .addReg(PPC::X1).addImm(NegNumbytes/4).addReg(PPC::X1); + .addReg(PPC::X1).addImm(NegFrameSize/4).addReg(PPC::X1); } else { - BuildMI(MBB, MBBI, PPC::LIS8, 1, PPC::X0).addImm(NegNumbytes >> 16); + BuildMI(MBB, MBBI, PPC::LIS8, 1, PPC::X0).addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, PPC::ORI8, 2, PPC::X0).addReg(PPC::X0) - .addImm(NegNumbytes & 0xFFFF); + .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, PPC::STDUX, 3).addReg(PPC::X1).addReg(PPC::X1) .addReg(PPC::X0); } @@ -728,7 +839,7 @@ void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Show update of SP. MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, NegNumbytes); + MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); Moves.push_back(new MachineMove(LabelID, SPDst, SPSrc)); // Add callee saved registers to move list. @@ -740,16 +851,12 @@ void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { Moves.push_back(new MachineMove(LabelID, CSDst, CSSrc)); } } - - // If there is a frame pointer, copy R1 (SP) into R31 (FP) + + // If there is a frame pointer, copy R1 into R31 if (HasFP) { if (!Subtarget.isPPC64()) { - BuildMI(MBB, MBBI, PPC::STW, 3) - .addReg(PPC::R31).addImm(GPRSize).addReg(PPC::R1); BuildMI(MBB, MBBI, PPC::OR, 2, PPC::R31).addReg(PPC::R1).addReg(PPC::R1); } else { - BuildMI(MBB, MBBI, PPC::STD, 3) - .addReg(PPC::X31).addImm(GPRSize/4).addReg(PPC::X1); BuildMI(MBB, MBBI, PPC::OR8, 2, PPC::X31).addReg(PPC::X1).addReg(PPC::X1); } } @@ -764,41 +871,46 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Get alignment info so we know how to restore r1 const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); // Get the number of bytes allocated from the FrameInfo. - unsigned NumBytes = MFI->getStackSize(); - unsigned GPRSize = Subtarget.isPPC64() ? 8 : 4; + unsigned FrameSize = MFI->getStackSize(); - if (NumBytes != 0) { - // If this function has a frame pointer, load the saved stack pointer from - // its stack slot. - if (hasFP(MF)) { - if (!Subtarget.isPPC64()) { - BuildMI(MBB, MBBI, PPC::LWZ, 2, PPC::R31) - .addImm(GPRSize).addReg(PPC::R31); - } else { - BuildMI(MBB, MBBI, PPC::LD, 2, PPC::X31) - .addImm(GPRSize/4).addReg(PPC::X31); - } - } - + if (FrameSize != 0) { // The loaded (or persistent) stack pointer value is offset by the 'stwu' // on entry to the function. Add this offset back now. if (!Subtarget.isPPC64()) { - if (NumBytes < 32768 && TargetAlign >= MFI->getMaxAlignment()) { + if (isInt16(FrameSize) && TargetAlign >= MaxAlign && + !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, PPC::ADDI, 2, PPC::R1) - .addReg(PPC::R1).addImm(NumBytes); + .addReg(PPC::R1).addImm(FrameSize); } else { BuildMI(MBB, MBBI, PPC::LWZ, 2, PPC::R1).addImm(0).addReg(PPC::R1); } } else { - if (NumBytes < 32768 && TargetAlign >= MFI->getMaxAlignment()) { + if (isInt16(FrameSize) && TargetAlign >= MaxAlign && + !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, PPC::ADDI8, 2, PPC::X1) - .addReg(PPC::X1).addImm(NumBytes); + .addReg(PPC::X1).addImm(FrameSize); } else { BuildMI(MBB, MBBI, PPC::LD, 2, PPC::X1).addImm(0).addReg(PPC::X1); } } + + // If this function has a frame pointer, load the saved frame pointer from + // its stack slot. + if (hasFP(MF)) { + int Offset = PPCFrameInfo::getFramePointerSaveOffset(Subtarget.isPPC64()); + + if (!Subtarget.isPPC64()) { + BuildMI(MBB, MBBI, PPC::LWZ, 2, PPC::R31) + .addImm(Offset).addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, PPC::LD, 2, PPC::X31) + .addImm(Offset/4).addReg(PPC::X1); + } + } + } } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 08af922..4dd10ac 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -61,8 +61,13 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void eliminateFrameIndex(MachineBasicBlock::iterator II) const; + /// determineFrameLayout - Determine the size of the frame and maximum call + /// frame size. + void determineFrameLayout(MachineFunction &MF) const; + void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 439650d..7f56688 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -224,7 +224,7 @@ def GPRC : RegisterClass<"PPC", [i32], 32, } GPRCClass::iterator GPRCClass::allocation_order_end(const MachineFunction &MF) const { - if (hasFP(MF)) + if (needsFP(MF)) return end()-4; // don't allocate R31, R0, R1, LR else return end()-3; // don't allocate R0, R1, LR @@ -247,7 +247,7 @@ def G8RC : RegisterClass<"PPC", [i64], 64, } G8RCClass::iterator G8RCClass::allocation_order_end(const MachineFunction &MF) const { - if (hasFP(MF)) + if (needsFP(MF)) return end()-4; else return end()-3; |