diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCFastISel.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCFastISel.cpp | 203 |
1 files changed, 111 insertions, 92 deletions
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 1149354..54532b5 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -15,7 +15,9 @@ #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/Optional.h" @@ -84,18 +86,20 @@ typedef struct Address { class PPCFastISel final : public FastISel { const TargetMachine &TM; + const PPCSubtarget *PPCSubTarget; + PPCFunctionInfo *PPCFuncInfo; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget *PPCSubTarget; LLVMContext *Context; public: explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), - TII(*TM.getSubtargetImpl()->getInstrInfo()), - TLI(*TM.getSubtargetImpl()->getTargetLowering()), - PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()), + PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()), + PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()), + TII(*PPCSubTarget->getInstrInfo()), + TLI(*PPCSubTarget->getTargetLowering()), Context(&FuncInfo.Fn->getContext()) {} // Backend specific FastISel code. @@ -119,6 +123,8 @@ class PPCFastISel final : public FastISel { unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + bool fastLowerCall(CallLoweringInfo &CLI) override; + // Instruction selection routines. private: bool SelectLoad(const Instruction *I); @@ -130,7 +136,6 @@ class PPCFastISel final : public FastISel { bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -139,6 +144,9 @@ class PPCFastISel final : public FastISel { private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); + bool isVSFRCRegister(unsigned Register) const { + return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID; + } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, @@ -171,9 +179,7 @@ class PPCFastISel final : public FastISel { CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg); - void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg); + bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); CCAssignFn *usePPC32CCs(unsigned Flag); private: @@ -482,6 +488,16 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + + // If this is a potential VSX load with an offset of 0, a VSX indexed load can + // be used. + bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg); + if (IsVSFRC && (Opc == PPC::LFD) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + if (ResultReg == 0) ResultReg = createResultReg(UseRC); @@ -489,6 +505,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { + // VSX only provides an indexed load. + if (IsVSFRC && Opc == PPC::LFD) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( @@ -501,6 +519,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Base reg with offset in range. } else if (UseOffset) { + // VSX only provides an indexed load. + if (IsVSFRC && Opc == PPC::LFD) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -524,7 +544,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LWA_32: Opc = PPC::LWAX_32; break; case PPC::LD: Opc = PPC::LDX; break; case PPC::LFS: Opc = PPC::LFSX; break; - case PPC::LFD: Opc = PPC::LFDX; break; + case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Addr.Base.Reg).addReg(IndexReg); @@ -602,10 +622,22 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { unsigned IndexReg = 0; PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg); + // If this is a potential VSX store with an offset of 0, a VSX indexed store + // can be used. + bool IsVSFRC = isVSFRCRegister(SrcReg); + if (IsVSFRC && (Opc == PPC::STFD) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + // Note: If we still have a frame index here, we know the offset is // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { + // VSX only provides an indexed store. + if (IsVSFRC && Opc == PPC::STFD) return false; + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), @@ -619,12 +651,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { .addMemOperand(MMO); // Base reg with offset in range. - } else if (UseOffset) + } else if (UseOffset) { + // VSX only provides an indexed store. + if (IsVSFRC && Opc == PPC::STFD) return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. - else { + } else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. @@ -638,7 +673,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STW8: Opc = PPC::STWX8; break; case PPC::STD: Opc = PPC::STDX; break; case PPC::STFS: Opc = PPC::STFSX; break; - case PPC::STFD: Opc = PPC::STFDX; break; + case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); @@ -1202,9 +1237,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); // Reserve space for the linkage area on the stack. - bool isELFv2ABI = PPCSubTarget->isELFv2ABI(); - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, - isELFv2ABI); + unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, 8); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); @@ -1243,7 +1276,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point - // register. + // register (unless we're using the fast calling convention). unsigned NextGPR = PPC::X3; unsigned NextFPR = PPC::F1; @@ -1293,7 +1326,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, unsigned ArgReg; if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { ArgReg = NextFPR++; - ++NextGPR; + if (CC != CallingConv::Fast) + ++NextGPR; } else ArgReg = NextGPR++; @@ -1307,9 +1341,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // For a call that we've determined we can fast-select, finish the // call sequence and generate a copy to obtain the return value (if any). -void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes, bool IsVarArg) { +bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; + // Issue CallSEQ_END. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) @@ -1320,7 +1354,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); @@ -1365,39 +1399,35 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, } assert(ResultReg && "ResultReg unset!"); - UsedRegs.push_back(SourcePhysReg); - updateValueMap(I, ResultReg); + CLI.InRegs.push_back(SourcePhysReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; } + + return true; } -// Attempt to fast-select a call instruction. -bool PPCFastISel::SelectCall(const Instruction *I) { - const CallInst *CI = cast<CallInst>(I); - const Value *Callee = CI->getCalledValue(); +bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsTailCall = CLI.IsTailCall; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const char *SymName = CLI.SymName; - // Can't handle inline asm. - if (isa<InlineAsm>(Callee)) + if (!Callee && !SymName) return false; // Allow SelectionDAG isel to handle tail calls. - if (CI->isTailCall()) + if (IsTailCall) return false; - // Obtain calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - bool IsVarArg = FTy->isVarArg(); - - // Not ready for varargs yet. + // Let SDISel handle vararg functions. if (IsVarArg) return false; // Handle simple calls for now, with legal return types and // those that can be extended. - Type *RetTy = I->getType(); + Type *RetTy = CLI.RetTy; MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1418,7 +1448,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Bail early if more than 8 arguments, as we only currently // handle arguments passed in registers. - unsigned NumArgs = CS.arg_size(); + unsigned NumArgs = CLI.OutVals.size(); if (NumArgs > 8) return false; @@ -1433,28 +1463,16 @@ bool PPCFastISel::SelectCall(const Instruction *I) { ArgVTs.reserve(NumArgs); ArgFlags.reserve(NumArgs); - for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end(); - II != IE; ++II) { - // FIXME: ARM does something for intrinsic calls here, check into that. - - unsigned AttrIdx = II - CS.arg_begin() + 1; - + for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { // Only handle easy calls for now. It would be reasonably easy // to handle <= 8-byte structures passed ByVal in registers, but we // have to ensure they are right-justified in the register. - if (CS.paramHasAttr(AttrIdx, Attribute::InReg) || - CS.paramHasAttr(AttrIdx, Attribute::StructRet) || - CS.paramHasAttr(AttrIdx, Attribute::Nest) || - CS.paramHasAttr(AttrIdx, Attribute::ByVal)) + ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; + if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) return false; - ISD::ArgFlagsTy Flags; - if (CS.paramHasAttr(AttrIdx, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrIdx, Attribute::ZExt)) - Flags.setZExt(); - - Type *ArgTy = (*II)->getType(); + Value *ArgValue = CLI.OutVals[i]; + Type *ArgTy = ArgValue->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; @@ -1462,14 +1480,11 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (ArgVT.isVector()) return false; - unsigned Arg = getRegForValue(*II); + unsigned Arg = getRegForValue(ArgValue); if (Arg == 0) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*II); + Args.push_back(ArgValue); ArgRegs.push_back(Arg); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); @@ -1483,39 +1498,46 @@ bool PPCFastISel::SelectCall(const Instruction *I) { RegArgs, CC, NumBytes, IsVarArg)) return false; + MachineInstrBuilder MIB; // FIXME: No handling for function pointers yet. This requires // implementing the function descriptor (OPD) setup. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV) - return false; - - // Build direct call with NOP for TOC restore. - // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BL8_NOP)); - // Add callee. - MIB.addGlobalAddress(GV); + if (!GV) { + // patchpoints are a special case; they always dispatch to a pointer value. + // However, we don't actually want to generate the indirect call sequence + // here (that will be generated, as necessary, during asm printing), and + // the call we generate here will be erased by FastISel::selectPatchpoint, + // so don't try very hard... + if (CLI.IsPatchPoint) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); + else + return false; + } else { + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + } // Add implicit physical register uses to the call. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); - // Direct calls in the ELFv2 ABI need the TOC register live into the call. - if (PPCSubTarget->isELFv2ABI()) - MIB.addReg(PPC::X2, RegState::Implicit); + // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live + // into the call. + PPCFuncInfo->setUsesTOCBasePtr(); + MIB.addReg(PPC::X2, RegState::Implicit); // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); - // Finish off the call including any return values. - SmallVector<unsigned, 4> UsedRegs; - finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg); - - // Set all unused physregs defs as dead. - static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + CLI.Call = MIB; - return true; + // Finish off the call including any return values. + return finishCall(RetVT, CLI, NumBytes); } // Attempt to fast-select a return instruction. @@ -1626,7 +1648,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { } MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(PPC::BLR)); + TII.get(PPC::BLR8)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); @@ -1805,9 +1827,7 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: - if (dyn_cast<IntrinsicInst>(I)) - return false; - return SelectCall(I); + return selectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: @@ -1846,6 +1866,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), @@ -1895,6 +1916,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { if (GV->isThreadLocal()) return 0; + PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), @@ -2077,7 +2099,7 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return PPCMaterializeGV(GV, VT); else if (isa<ConstantInt>(C)) - return PPCMaterializeInt(C, VT); + return PPCMaterializeInt(C, VT, VT != MVT::i1); return 0; } @@ -2280,13 +2302,10 @@ namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { - const TargetMachine &TM = FuncInfo.MF->getTarget(); - // Only available on 64-bit ELF for now. - const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); - if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) + const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>(); + if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) return new PPCFastISel(FuncInfo, LibInfo); - return nullptr; } } |