diff options
Diffstat (limited to 'lib/Target/PowerPC')
40 files changed, 1136 insertions, 805 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index bf00e73..99a1633 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -271,9 +271,9 @@ class PPCAsmParser : public MCTargetAsmParser { public: - PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), MII(_MII) { + PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(STI), MII(MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || @@ -425,7 +425,9 @@ public: bool isToken() const override { return Kind == Token; } bool isImm() const override { return Kind == Immediate || Kind == Expression; } + bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } + bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 0ed0723..a9f5fc7 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, CRRegs); } +static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index c287fbe..311a4f2 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 1 && "Invalid u1imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); @@ -221,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 8 && "Invalid u3imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 6ead19b..8718743 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -43,7 +43,9 @@ public: void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = nullptr); + void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 2b4f2d8..d8fab5b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { void PPCELFMCAsmInfo::anchor() { } PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { + // FIXME: This is not always needed. For example, it is not needed in the + // v2 abi. + NeedsLocalForSize = true; + if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 06d380e..b9f0afb 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -39,10 +40,10 @@ class PPCMCCodeEmitter : public MCCodeEmitter { bool IsLittleEndian; public: - PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle) - : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) { - } - + PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), CTX(ctx), + IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} + ~PPCMCCodeEmitter() {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, @@ -158,14 +159,11 @@ public: }; } // end anonymous namespace - + MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx) { - Triple TT(STI.getTargetTriple()); - bool IsLittleEndian = TT.getArch() == Triple::ppc64le; - return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian); + return new PPCMCCodeEmitter(MCII, Ctx); } unsigned PPCMCCodeEmitter:: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index f0a6bb9..1c840d9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -36,9 +36,8 @@ private: int64_t EvaluateAsInt64(int64_t Value) const; - explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr, - bool _IsDarwin) - : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {} + explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin) + : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {} public: /// @name Construction diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f2da389..2f7a768 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -145,6 +145,7 @@ public: } void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation + Streamer.EmitValueToAlignment(8); Streamer.EmitSymbolValue(&S, 8); } void emitMachine(StringRef CPU) override { @@ -222,32 +223,19 @@ public: }; } -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - if (Triple(TT).isOSDarwin()) { - MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); - new PPCTargetMachOStreamer(*S); - return S; - } - - MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); - new PPCTargetELFStreamer(*S); - return S; +static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new PPCTargetAsmStreamer(S, OS); } -static MCStreamer * -createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst) { - - MCStreamer *S = llvm::createAsmStreamer( - Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); - new PPCTargetAsmStreamer(*S, OS); - return S; +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + Triple TT(STI.getTargetTriple()); + if (TT.getObjectFormat() == Triple::ELF) + return new PPCTargetELFStreamer(S); + return new PPCTargetMachOStreamer(S); } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, @@ -261,60 +249,36 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T, } extern "C" void LLVMInitializePowerPCTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo); - RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo); - RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget, - createPPCMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget, - createPPCMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, - createPPCMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, - createPPCMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget, - createPPCMCSubtargetInfo); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget, - createPPCMCCodeEmitter); - + for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) { + // Register the MC asm info. + RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter); + // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend); - TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend); - TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer); - - // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget, - createPPCMCInstPrinter); + TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend); + + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter); + } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 68f7f7a..8b1e3b4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -34,10 +34,9 @@ class raw_ostream; extern Target ThePPC32Target; extern Target ThePPC64Target; extern Target ThePPC64LETarget; - + MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f53add5..f175f6d 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -18,7 +18,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // PowerPC Subtarget features. // - + //===----------------------------------------------------------------------===// // CPU Directives // //===----------------------------------------------------------------------===// @@ -112,14 +112,21 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; +def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", + "Enable POWER8 Crypto instructions", + [FeatureP8Altivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; - +def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", + "HasPartwordAtomics", "true", + "Enable l[bh]arx and st[bh]cx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", "Assume function descriptors are invariant">; +def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", + "Enable Hardware Transactional Memory instructions">; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -256,11 +263,11 @@ def ProcessorFeatures { [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - DeprecatedMFTB, DeprecatedDST]; + FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; } def : ProcessorModel<"970", G5Model, @@ -339,7 +346,7 @@ def : ProcessorModel<"pwr7", P7Model, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, + Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1327290..cd60906 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -69,12 +69,11 @@ namespace { protected: MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget *Subtarget; - uint64_t TOCLabelID; StackMaps SM; public: explicit PPCAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)), TOCLabelID(0), SM(*this) {} + : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} const char *getPassName() const override { return "PowerPC Assembly Printer"; @@ -321,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - const DataLayout *DL = TM.getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; - - // To avoid name clash check if the name already exists. - while (!TOCEntry) { - if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + - "C" + Twine(TOCLabelID++)) == nullptr) { - TOCEntry = GetTempSymbol("C", TOCLabelID); - } - } - + if (!TOCEntry) + TOCEntry = createTempSymbol("C"); return TOCEntry; } @@ -1068,8 +1059,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.SwitchSection(Section); OutStreamer.EmitLabel(CurrentFnSym); OutStreamer.EmitValueToAlignment(8); - MCSymbol *Symbol1 = - OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); + MCSymbol *Symbol1 = CurrentFnSymForSize; // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), @@ -1082,11 +1072,6 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Emit a null environment pointer. OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current.first, Current.second); - - MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( - ".L." + Twine(CurrentFnSym->getName())); - OutStreamer.EmitLabel(RealFnSym); - CurrentFnSymForSize = RealFnSym; } diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5af8aab..c595f44 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -171,8 +171,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; @@ -533,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // selected branch. MadeChange = true; - SCEVExpander SCEVE(*SE, "loopcnt"); + SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 54532b5..fbd7b6d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -675,8 +675,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STFS: Opc = PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; @@ -1532,7 +1542,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b10e854..3ac8e94 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -186,20 +186,34 @@ namespace { /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override { - // We need to make sure that this one operand does not end up in r0 - // (because we might end up lowering this as 0(%op)). - const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); - const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); - SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); - SDValue NewOp = - SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - SDLoc(Op), Op.getValueType(), - Op, RC), 0); - - OutOps.push_back(NewOp); - return false; + + switch(ConstraintID) { + default: + errs() << "ConstraintID: " << ConstraintID << "\n"; + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_es: + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_o: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_Z: + case InlineAsm::Constraint_Zy: + // We need to make sure that this one operand does not end up in r0 + // (because we might end up lowering this as 0(%op)). + const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + SDValue NewOp = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + SDLoc(Op), Op.getValueType(), + Op, RC), 0); + + OutOps.push_back(NewOp); + return false; + } + return true; } void InsertVRSaveCode(MachineFunction &MF); @@ -2105,7 +2119,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, -// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; @@ -2184,6 +2198,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPEQUH; else if (VecVT == MVT::v4i32) return PPC::VCMPEQUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPEQUD; break; case ISD::SETGT: if (VecVT == MVT::v16i8) @@ -2192,6 +2208,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTSH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTSD; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) @@ -2200,6 +2218,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, return PPC::VCMPGTUH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTUW; + else if (VecVT == MVT::v2i64) + return PPC::VCMPGTUD; break; default: break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 147e94b..871531e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - setOperationAction(ISD::MUL, MVT::v4i32, Custom); + + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + else + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); - // VSX v2i64 only supports non-arithmetic operations. - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SRA, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - setOperationAction(ISD::SRA, MVT::v2i64, Expand); - setOperationAction(ISD::SRL, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); + } + else { + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; + } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { + // The A2 also benefits from (very) aggressive inlining of memcpy and + // friends. The overhead of a the function call, even when warm, can be + // over one hundred cycles. + MaxStoresPerMemset = 128; + MaxStoresPerMemcpy = 128; + MaxStoresPerMemmove = 128; } } @@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LARX: return "PPCISD::LARX"; - case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); - // If the element value is larger than the splat value, cut it in half and - // check to see if the two halves are equal. Continue doing this until we - // get to ByteSize. This allows us to handle 0x01010101 as 0x01. - while (ValSizeInBytes > ByteSize) { - ValSizeInBytes >>= 1; - - // If the top half equals the bottom half, we're still ok. - if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != - (Value & ((1 << (8*ValSizeInBytes))-1))) - return SDValue(); - } + // If the element value is larger than the splat value, check if it consists + // of a repeated bit pattern of size ByteSize. + if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) + return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); @@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. -static const MCPhysReg *GetFPR() { - static const MCPhysReg FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; +static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, + PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, + PPC::F11, PPC::F12, PPC::F13}; - return FPR; -} - -/// GetQFPR - Get the set of QPX registers that should be allocated for -/// arguments. -static const MCPhysReg *GetQFPR() { - static const MCPhysReg QFPR[] = { - PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, - PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13 - }; - - return QFPR; -} +/// QFPR - The set of QPX registers that should be allocated for arguments. +static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. @@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); @@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; - static const MCPhysReg *QFPR = GetQFPR(); - const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const EVT VTys[] = { // canonical VT to use for each size. + static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; @@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, /// altivec comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot) { + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 1; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtud_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 1; + } + else + return false; + break; + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 0; + } + else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtud: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 0; + } + else + return false; + + break; } return true; } @@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. @@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) const { + unsigned AtomicSize, + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch (AtomicSize) { + default: + llvm_unreachable("Unexpected size of atomic entity"); + case 1: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 2: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 4: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case 8: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } + const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = BB; @@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : - RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass + RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: @@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) - BB = EmitAtomicBinary(MI, BB, false, 0); + BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) - BB = EmitAtomicBinary(MI, BB, true, 0); + BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch(MI->getOpcode()) { + default: + llvm_unreachable("Compare and swap of unknown size"); + case PPC::ATOMIC_CMP_SWAP_I8: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I16: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I32: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case PPC::ATOMIC_CMP_SWAP_I64: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); @@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(loop1MBB); // loop1MBB: - // l[wd]arx dest, ptr + // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: - // st[wd]cx. newval, ptr + // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: - // st[wd]cx. dest, ptr + // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); @@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); @@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); @@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI->getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI->getDebugLoc(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); + return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } @@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + if (Subtarget.isPPC64()) { return MVT::i64; - } else { - return MVT::i32; } + + return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 04afe88..8afd7ef 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -166,14 +166,6 @@ namespace llvm { /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. MFFS, - /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and - /// reserve indexed. This is used to implement atomic operations. - LARX, - - /// STCX = This corresponds to PPC stcx. instrcution: store conditional - /// indexed. This is used to implement atomic operations. - STCX, - /// TC_RETURN - A tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -489,7 +481,8 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, bool is64Bit, + MachineBasicBlock *MBB, + unsigned AtomicSize, unsigned BinOpcode) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, @@ -526,6 +519,21 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode == "es") + return InlineAsm::Constraint_es; + else if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + else if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + else if (ConstraintCode == "Z") + return InlineAsm::Constraint_Z; + else if (ConstraintCode == "Zy") + return InlineAsm::Constraint_Zy; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 69c0d7d..183d088 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -235,15 +235,19 @@ let usesCustomInserter = 1 in { } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), - "ldarx $rD, $ptr", IIC_LdStLDARX, - [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; + "ldarx $rD, $ptr", IIC_LdStLDARX, []>; + +// Instruction to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT; +} -let Defs = [CR0] in +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), - "stdcx. $rS, $dst", IIC_LdStSTDCX, - [(PPCstcx i64:$rS, xoaddr:$dst)]>, - isDOT; + "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT; let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -325,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { Requires<[In64BitMode]>; } +def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", IIC_SprMFSPR>; +def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT), + "mtspr $SPR, $RT", IIC_SprMTSPR>; + + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -696,7 +706,7 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, @@ -752,7 +762,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Zero extending loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; @@ -810,7 +820,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), // Full 8-byte loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index f6acd6e..123808b 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, !strconcat(opc, " $vD, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vB))]>; +class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA), + !strconcat(opc, " $vD, $vA"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA))]>; + +class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX), + !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>; + //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -342,7 +352,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; -let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. +let PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; @@ -750,7 +760,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; - + let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", IIC_VecFP, @@ -939,8 +949,50 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)), } // end HasAltivec def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; +def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">; let Predicates = [HasP8Altivec] in { +let isCommutable = 1 in { +def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw, + v2i64, v4i32>; +def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw, + v2i64, v4i32>; +def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw, + v2i64, v4i32>; +def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw, + v2i64, v4i32>; +def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuluwm $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>; +def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>; +def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>; +def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; +def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; +} // isCommutable + +// Vector shifts +def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; +def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>; +def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>; +def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrad $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>; + +// Vector Integer Arithmetic Instructions +let isCommutable = 1 in { +def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +} // isCommutable + +def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; + // Count Leading Zeros def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), "vclzb $vD, $vB", IIC_VecGeneral, @@ -992,4 +1044,42 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vorc $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (or v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; + +// i64 element comparisons. +def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>; +def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>; +def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>; +def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; +def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>; +def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; + +// The cryptography instructions that do not require Category:Vector.Crypto +def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb", + int_ppc_altivec_crypto_vpmsumb, v16i8>; +def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh", + int_ppc_altivec_crypto_vpmsumh, v8i16>; +def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", + int_ppc_altivec_crypto_vpmsumw, v4i32>; +def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", + int_ppc_altivec_crypto_vpmsumd, v2i64>; +def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", + int_ppc_altivec_crypto_vpermxor, v16i8>; + } // end HasP8Altivec + +// Crypto instructions (from builtins) +let Predicates = [HasP8Crypto] in { +def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw", + int_ppc_altivec_crypto_vshasigmaw, v4i32>; +def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad", + int_ppc_altivec_crypto_vshasigmad, v2i64>; +def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher, + v2i64>; +def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast", + int_ppc_altivec_crypto_vcipherlast, v2i64>; +def VNCIPHER : VX1_Int_Ty<1352, "vncipher", + int_ppc_altivec_crypto_vncipher, v2i64>; +def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", + int_ppc_altivec_crypto_vncipherlast, v2i64>; +def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; +} // HasP8Crypto diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 506a2d0..b7a7a1f 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -693,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let A = 0; } +class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit R; + + bit RC = 1; + + let Inst{6-9} = 0; + let Inst{10} = R; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit A; + + bit RC = 1; + + let Inst{6} = A; + let Inst{7-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit L; + + bit RC = 0; // set by isDOT + + let Inst{7-9} = 0; + let Inst{10} = L; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + + bit RC = 0; + + let Inst{6-8} = BF; + let Inst{9-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + // XX*-Form (VSX) class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -1470,6 +1524,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, let Inst{21-31} = xo; } +/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" +class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<1> ST; + bits<4> SIX; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16} = ST; + let Inst{17-20} = SIX; + let Inst{21-31} = xo; +} + +/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox" +class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + // E-4 VXR-Form class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td new file mode 100644 index 0000000..20e6a62 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrHTM.td @@ -0,0 +1,172 @@ +//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hardware Transactional Memory extension to the +// PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + + + +def HasHTM : Predicate<"PPCSubTarget->hasHTM()">; + +def HTM_get_imm : SDNodeXForm<imm, [{ + return getI32Imm (N->getZExtValue()); +}]>; + +let hasSideEffects = 1, usesCustomInserter = 1 in { +def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; +} + + +let Predicates = [HasHTM] in { + +def TBEGIN : XForm_htm0 <31, 654, + (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; + +def TEND : XForm_htm1 <31, 686, + (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; + +def TABORT : XForm_base_r3xo <31, 910, + (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, + []>, isDOT { + let RST = 0; + let B = 0; +} + +def TABORTWC : XForm_base_r3xo <31, 782, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTWCI : XForm_base_r3xo <31, 846, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDC : XForm_base_r3xo <31, 814, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDCI : XForm_base_r3xo <31, 878, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TSR : XForm_htm2 <31, 750, + (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, + isDOT; + +def TCHECK : XForm_htm3 <31, 718, + (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>; + + +def TRECLAIM : XForm_base_r3xo <31, 942, + (outs crrc:$ret), (ins gprc:$A), "treclaim. $A", + IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let B = 0; +} + +def TRECHKPT : XForm_base_r3xo <31, 1006, + (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let A = 0; + let B = 0; +} + +// Builtins + +// All HTM instructions, with the exception of tcheck, set CR0 with the +// value of the MSR Transaction State (TS) bits that exist before the +// instruction is executed. For tbegin., the EQ bit in CR0 can be used +// to determine whether the transaction was successfully started (0) or +// failed (1). We use an XORI pattern to 'flip' the bit to match the +// tbegin builtin API which defines a return value of 1 as success. + +def : Pat<(int_ppc_tbegin i32:$R), + (XORI + (EXTRACT_SUBREG ( + TBEGIN (HTM_get_imm imm:$R)), sub_eq), + 1)>; + +def : Pat<(int_ppc_tend i32:$R), + (TEND (HTM_get_imm imm:$R))>; + + +def : Pat<(int_ppc_tabort i32:$R), + (TABORT $R)>; + +def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB), + (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI), + (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB), + (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI), + (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tcheck), + (TCHECK_RET)>; + +def : Pat<(int_ppc_treclaim i32:$RA), + (TRECLAIM $RA)>; + +def : Pat<(int_ppc_trechkpt), + (TRECHKPT)>; + +def : Pat<(int_ppc_tsr i32:$L), + (TSR (HTM_get_imm imm:$L))>; + +def : Pat<(int_ppc_get_texasr), + (MFSPR8 130)>; + +def : Pat<(int_ppc_get_texasru), + (MFSPR8 131)>; + +def : Pat<(int_ppc_get_tfhar), + (MFSPR8 128)>; + +def : Pat<(int_ppc_get_tfiar), + (MFSPR8 129)>; + + +def : Pat<(int_ppc_set_texasr i64:$V), + (MTSPR8 130, $V)>; + +def : Pat<(int_ppc_set_texasru i64:$V), + (MTSPR8 131, $V)>; + +def : Pat<(int_ppc_set_tfhar i64:$V), + (MTSPR8 128, $V)>; + +def : Pat<(int_ppc_set_tfiar i64:$V), + (MTSPR8 129, $V)>; + + +// Extended mnemonics +def : Pat<(int_ppc_tendall), + (TEND 1)>; + +def : Pat<(int_ppc_tresume), + (TSR 1)>; + +def : Pat<(int_ppc_tsuspend), + (TSR 0)>; + +def : Pat<(i64 (int_ppc_ttest)), + (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>; + +} // [HasHTM] diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index fe9474a..c9c2949 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -61,7 +61,7 @@ void PPCInstrInfo::anchor() {} PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - Subtarget(STI), RI(STI) {} + Subtarget(STI), RI(STI.getTargetMachine()) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. @@ -113,9 +113,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineOperand &DefMO = DefMI->getOperand(DefIdx); unsigned Reg = DefMO.getReg(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); bool IsRegCR; - if (TRI->isVirtualRegister(Reg)) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI->getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || @@ -697,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, .addReg(Cond[1].getReg(), 0, SubIdx); } +static unsigned getCRBitValue(unsigned CRBit) { + unsigned Ret = 4; + if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT || + CRBit == PPC::CR2LT || CRBit == PPC::CR3LT || + CRBit == PPC::CR4LT || CRBit == PPC::CR5LT || + CRBit == PPC::CR6LT || CRBit == PPC::CR7LT) + Ret = 3; + if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT || + CRBit == PPC::CR2GT || CRBit == PPC::CR3GT || + CRBit == PPC::CR4GT || CRBit == PPC::CR5GT || + CRBit == PPC::CR6GT || CRBit == PPC::CR7GT) + Ret = 2; + if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ || + CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ || + CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ || + CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ) + Ret = 1; + if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN || + CRBit == PPC::CR2UN || CRBit == PPC::CR3UN || + CRBit == PPC::CR4UN || CRBit == PPC::CR5UN || + CRBit == PPC::CR6UN || CRBit == PPC::CR7UN) + Ret = 0; + + assert(Ret != 4 && "Invalid CR bit register"); + return Ret; +} + void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -742,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SrcReg = SuperReg; } + // Different class register copy + if (PPC::CRBITRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + unsigned CRReg = getCRFromCRBit(SrcReg); + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(CRReg), getKillRegState(KillSrc); + // Rotate the CR bit in the CR fields to be the least significant bit and + // then mask with 0x1 (MB = ME = 31). + BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) + .addImm(31) + .addImm(31); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::G8RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 4add6f9..7fd076a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -63,7 +63,7 @@ enum PPC970_Unit { }; } // end namespace PPCII - +class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; const PPCRegisterInfo RI; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1a045b1..5eff156 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPClarx : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; -def SDT_PPCstcx : SDTypeProfile<0, 2, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; - def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; @@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// Instructions to support atomic operations -def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, - [SDNPHasChain, SDNPMayLoad]>; -def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, - [SDNPHasChain, SDNPMayStore]>; - // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; @@ -445,6 +432,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass { def crrc : RegisterOperand<CRRC> { let ParserMatchClass = PPCRegCRRCAsmOperand; } +def crrc0 : RegisterOperand<CRRC0> { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} + +def PPCU1ImmAsmOperand : AsmOperandClass { + let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let RenderMethod = "addImmOperands"; +} +def u1imm : Operand<i32> { + let PrintMethod = "printU1ImmOperand"; + let ParserMatchClass = PPCU1ImmAsmOperand; +} def PPCU2ImmAsmOperand : AsmOperandClass { let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; @@ -455,6 +454,15 @@ def u2imm : Operand<i32> { let ParserMatchClass = PPCU2ImmAsmOperand; } +def PPCU3ImmAsmOperand : AsmOperandClass { + let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let RenderMethod = "addImmOperands"; +} +def u3imm : Operand<i32> { + let PrintMethod = "printU3ImmOperand"; + let ParserMatchClass = PPCU3ImmAsmOperand; +} + def PPCU4ImmAsmOperand : AsmOperandClass { let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; let RenderMethod = "addImmOperands"; @@ -715,7 +723,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; - +def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; @@ -1446,15 +1454,44 @@ let usesCustomInserter = 1 in { } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { +def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + +def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), - "lwarx $rD, $src", IIC_LdStLWARX, - [(set i32:$rD, (PPClarx xoaddr:$src))]>; + "lwarx $rD, $src", IIC_LdStLWARX, []>; + +// Instructions to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), + "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT; +} + +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in { +def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst), + "stbcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; + +def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst), + "sthcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; -let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), - "stwcx. $rS, $dst", IIC_LdStSTWCX, - [(PPCstcx i32:$rS, xoaddr:$dst)]>, - isDOT; + "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; +} let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; @@ -1473,7 +1510,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), // // Unindexed (r+i) Loads. -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; @@ -1570,7 +1607,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // Indexed (r+r) Loads. // -let canFoldAsLoad = 1, PPC970_Unit = 2 in { +let PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; @@ -2683,6 +2720,7 @@ include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" include "PPCInstrQPX.td" +include "PPCInstrHTM.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td index c984d46..5c66b42 100644 --- a/lib/Target/PowerPC/PPCInstrQPX.td +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -501,7 +501,7 @@ let Uses = [RM] in { "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; // Load indexed instructions - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1 in { def QVLFDX : XForm_1<31, 583, (outs qfrc:$FRT), (ins memrr:$src), "qvlfdx $FRT, $src", IIC_LdStLFD, diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index d6cb3a0..ec04da4 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -66,7 +66,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects. let Uses = [RM] in { // Load indexed instructions - let mayLoad = 1, canFoldAsLoad = 1 in { + let mayLoad = 1 in { def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index efd2d92..005bcaf 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -104,7 +104,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref bool PPCLoopDataPrefetch::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); - DL = F.getParent()->getDataLayout(); + DL = &F.getParent()->getDataLayout(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); @@ -192,7 +192,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); if (const SCEVConstant *ConstPtrDiff = dyn_cast<SCEVConstant>(PtrDiff)) { - int64_t PD = abs64(ConstPtrDiff->getValue()->getSExtValue()); + int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); if (PD < (int64_t) CacheLineSize) { DupPref = true; break; @@ -211,7 +211,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); - SCEVExpander SCEVE(*SE, "prefaddr"); + SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); IRBuilder<> Builder(MemI); diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index df65227..092a4ef 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" @@ -84,7 +85,6 @@ namespace { PPCTargetMachine *TM; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; }; } @@ -141,9 +141,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; - bool MadeChange = false; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); @@ -158,9 +155,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { bool MadeChange = false; - if (!DL) - return MadeChange; - // Only prep. the inner-most loop if (!L->empty()) return MadeChange; @@ -261,6 +255,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); + Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), BasePtr->getType()->getPointerAddressSpace()); @@ -280,7 +275,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); - SCEVExpander SCEVE(*SE, "pistart"); + SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, LoopPredecessor->getTerminator()); @@ -295,8 +290,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { } Instruction *InsPoint = Header->getFirstInsertionPt(); - GetElementPtrInst *PtrInc = - GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(), + GetElementPtrInst *PtrInc = GetElementPtrInst::Create( + I8Ty, NewPHI, BasePtrIncSCEV->getValue(), MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -341,9 +336,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { PtrIP = PtrIP->getParent()->getFirstInsertionPt(); else if (!PtrIP) PtrIP = I->second; - - GetElementPtrInst *NewPtr = - GetElementPtrInst::Create(PtrInc, Diff->getValue(), + + GetElementPtrInst *NewPtr = GetElementPtrInst::Create( + I8Ty, PtrInc, Diff->getValue(), I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP); if (!PtrIP) NewPtr->insertAfter(cast<Instruction>(PtrInc)); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 819738b..0965cb3 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -184,6 +184,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: assert(!MO.getSubReg() && "Subregs should be eliminated!"); + assert(MO.getReg() > PPC::NoRegister && + MO.getReg() < PPC::NUM_TARGET_REGS && + "Invalid register for this target!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index c9a9684..0e568d3 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -18,6 +18,7 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -56,11 +57,11 @@ static cl::opt<bool> AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false), cl::desc("Force the use of a base pointer in every function")); -PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST) - : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, - ST.isPPC64() ? 0 : 1, - ST.isPPC64() ? 0 : 1), - Subtarget(ST) { +PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM) + : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR, + TM.isPPC64() ? 0 : 1, + TM.isPPC64() ? 0 : 1), + TM(TM) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value // when it checks for ZERO folding. if (Kind == 1) { - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return &PPC::G8RC_NOX0RegClass; return &PPC::GPRC_NOR0RegClass; } - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; } const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>(); if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) { if (Subtarget.hasVSX()) return CSR_64_AllRegs_VSX_SaveList; @@ -108,28 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_Darwin64_Altivec_SaveList : - CSR_Darwin64_SaveList) : - (Subtarget.hasAltivec() ? - CSR_Darwin32_Altivec_SaveList : - CSR_Darwin32_SaveList); + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList + : CSR_Darwin64_SaveList) + : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList + : CSR_Darwin32_SaveList); // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList : - CSR_SVR464_Altivec_SaveList) : - (SaveR2 ? CSR_SVR464_R2_SaveList : - CSR_SVR464_SaveList)) : - (Subtarget.hasAltivec() ? - CSR_SVR432_Altivec_SaveList : - CSR_SVR432_SaveList); + return TM.isPPC64() + ? (Subtarget.hasAltivec() + ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList + : CSR_SVR464_Altivec_SaveList) + : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList)) + : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList + : CSR_SVR432_SaveList); } -const uint32_t* -PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +const uint32_t * +PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (CC == CallingConv::AnyReg) { if (Subtarget.hasVSX()) return CSR_64_AllRegs_VSX_RegMask; @@ -139,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_Darwin64_Altivec_RegMask : - CSR_Darwin64_RegMask) : - (Subtarget.hasAltivec() ? - CSR_Darwin32_Altivec_RegMask : - CSR_Darwin32_RegMask); - - return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? - CSR_SVR464_Altivec_RegMask : - CSR_SVR464_RegMask) : - (Subtarget.hasAltivec() ? - CSR_SVR432_Altivec_RegMask : - CSR_SVR432_RegMask); + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask + : CSR_Darwin64_RegMask) + : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask + : CSR_Darwin32_RegMask); + + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask + : CSR_SVR464_RegMask) + : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask + : CSR_SVR432_RegMask); } const uint32_t* @@ -160,15 +158,13 @@ PPCRegisterInfo::getNoPreservedMask() const { } void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { - unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM }; - for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) { - unsigned Reg = PseudoRegs[i]; - Mask[Reg / 32] &= ~(1u << (Reg % 32)); - } + for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM}) + Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32)); } BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); @@ -207,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } // On PPC64, r13 is the thread pointer. Never allocate this register. - if (Subtarget.isPPC64()) { + if (TM.isPPC64()) { Reserved.set(PPC::R13); Reserved.set(PPC::X1); @@ -238,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R31); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && + TM.getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R29); else Reserved.set(PPC::R30); } - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + if (Subtarget.isSVR4ABI() && !TM.isPPC64() && + TM.getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -260,6 +256,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const unsigned DefaultSafety = 1; @@ -291,8 +288,10 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass( - const TargetRegisterClass *RC) const { +const TargetRegisterClass * +PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (Subtarget.hasVSX()) { // With VSX, we can inflate various sub-register classes to the full VSX // register set. @@ -303,7 +302,7 @@ const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass( return &PPC::VSRCRegClass; } - return TargetRegisterInfo::getLargestLegalSuperClass(RC); + return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF); } //===----------------------------------------------------------------------===// @@ -326,10 +325,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { MachineFunction &MF = *MBB.getParent(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // Get the instruction info. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); // Determine whether 64-bit pointers are used. - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); DebugLoc dl = MI.getDebugLoc(); // Get the maximum call stack size. @@ -443,10 +443,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -487,10 +488,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -522,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } -static unsigned getCRFromCRBit(unsigned SrcReg) { - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - assert(Reg != 0 && "Invalid CR bit register"); - return Reg; -} - void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. @@ -560,10 +531,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -603,10 +575,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); - bool LP64 = Subtarget.isPPC64(); + bool LP64 = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -650,6 +623,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -675,6 +649,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -697,14 +672,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 // is arbitrary and will be subsequently ignored. For 32-bit, we have // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) + if (TM.isPPC64()) FrameIdx = 0; else { const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -757,6 +732,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); // Get the basic block's function. MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); // Get the instruction info. const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); // Get the frame info. @@ -847,7 +823,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - bool is64Bit = Subtarget.isPPC64(); + bool is64Bit = TM.isPPC64(); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; @@ -885,23 +861,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); - if (!Subtarget.isPPC64()) + if (!TM.isPPC64()) return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; else return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; } unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); if (!hasBasePointer(MF)) return getFrameRegister(MF); - if (Subtarget.isPPC64()) + if (TM.isPPC64()) return PPC::X30; if (Subtarget.isSVR4ABI() && - MF.getTarget().getRelocationModel() == Reloc::PIC_) + TM.getRelocationModel() == Reloc::PIC_) return PPC::R29; return PPC::R30; @@ -927,6 +905,7 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { } bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment(); @@ -964,7 +943,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); unsigned StackEst = @@ -983,7 +962,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The frame pointer will point to the end of the stack, so estimate the // offset as the difference between the object offset and the FP location. - return !isFrameOffsetLegal(MI, Offset); + return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset); } /// Insert defining instruction(s) for BaseReg to @@ -992,7 +971,7 @@ void PPCRegisterInfo:: materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const { - unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI; MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -1000,6 +979,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); const MachineFunction &MF = *MBB->getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); @@ -1025,6 +1005,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); const MCInstrDesc &MCID = MI.getDesc(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1033,6 +1014,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + unsigned BaseReg, int64_t Offset) const { unsigned FIOperandNum = 0; while (!MI->getOperand(FIOperandNum).isFI()) { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 4c2ef90..d304e1d 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -22,15 +22,44 @@ #include "PPCGenRegisterInfo.inc" namespace llvm { -class PPCSubtarget; -class TargetInstrInfo; -class Type; + +inline static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + class PPCRegisterInfo : public PPCGenRegisterInfo { DenseMap<unsigned, unsigned> ImmToIdxMap; - const PPCSubtarget &Subtarget; + const PPCTargetMachine &TM; public: - PPCRegisterInfo(const PPCSubtarget &SubTarget); + PPCRegisterInfo(const PPCTargetMachine &TM); /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. @@ -40,13 +69,14 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; /// Code Generation virtual methods... - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; void adjustStackMapLiveOutMask(uint32_t *Mask) const override; @@ -97,7 +127,7 @@ public: int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, + bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override; // Debug information queries. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 9a7df96..6ca68ed 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32, def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4)>; +def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>; + // The CTR registers are not allocatable because they're used by the // decrement-and-branch instructions, and thus need to stay live across // multiple basic blocks. diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 2f3a1f9..d0954a1 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -124,401 +124,3 @@ include "PPCScheduleP8.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" - -//===----------------------------------------------------------------------===// -// Instruction to itinerary class map - When add new opcodes to the supported -// set, refer to the following table to determine which itinerary class the -// opcode belongs. -// -// opcode itinerary class -// ====== =============== -// add IIC_IntSimple -// addc IIC_IntGeneral -// adde IIC_IntGeneral -// addi IIC_IntSimple -// addic IIC_IntGeneral -// addic. IIC_IntGeneral -// addis IIC_IntSimple -// addme IIC_IntGeneral -// addze IIC_IntGeneral -// and IIC_IntSimple -// andc IIC_IntSimple -// andi. IIC_IntGeneral -// andis. IIC_IntGeneral -// b IIC_BrB -// bc IIC_BrB -// bcctr IIC_BrB -// bclr IIC_BrB -// cmp IIC_IntCompare -// cmpi IIC_IntCompare -// cmpl IIC_IntCompare -// cmpli IIC_IntCompare -// cntlzd IIC_IntRotateD -// cntlzw IIC_IntGeneral -// crand IIC_BrCR -// crandc IIC_BrCR -// creqv IIC_BrCR -// crnand IIC_BrCR -// crnor IIC_BrCR -// cror IIC_BrCR -// crorc IIC_BrCR -// crxor IIC_BrCR -// dcba IIC_LdStDCBA -// dcbf IIC_LdStDCBF -// dcbi IIC_LdStDCBI -// dcbst IIC_LdStDCBF -// dcbt IIC_LdStLoad -// dcbtst IIC_LdStLoad -// dcbz IIC_LdStDCBF -// divd IIC_IntDivD -// divdu IIC_IntDivD -// divw IIC_IntDivW -// divwu IIC_IntDivW -// dss IIC_LdStDSS -// dst IIC_LdStDSS -// dstst IIC_LdStDSS -// eciwx IIC_LdStLoad -// ecowx IIC_LdStLoad -// eieio IIC_LdStLoad -// eqv IIC_IntSimple -// extsb IIC_IntSimple -// extsh IIC_IntSimple -// extsw IIC_IntSimple -// fabs IIC_FPGeneral -// fadd IIC_FPAddSub -// fadds IIC_FPGeneral -// fcfid IIC_FPGeneral -// fcmpo IIC_FPCompare -// fcmpu IIC_FPCompare -// fctid IIC_FPGeneral -// fctidz IIC_FPGeneral -// fctiw IIC_FPGeneral -// fctiwz IIC_FPGeneral -// fdiv IIC_FPDivD -// fdivs IIC_FPDivS -// fmadd IIC_FPFused -// fmadds IIC_FPGeneral -// fmr IIC_FPGeneral -// fmsub IIC_FPFused -// fmsubs IIC_FPGeneral -// fmul IIC_FPFused -// fmuls IIC_FPGeneral -// fnabs IIC_FPGeneral -// fneg IIC_FPGeneral -// fnmadd IIC_FPFused -// fnmadds IIC_FPGeneral -// fnmsub IIC_FPFused -// fnmsubs IIC_FPGeneral -// fres IIC_FPRes -// frsp IIC_FPGeneral -// frsqrte IIC_FPGeneral -// fsel IIC_FPGeneral -// fsqrt IIC_FPSqrtD -// fsqrts IIC_FPSqrtS -// fsub IIC_FPAddSub -// fsubs IIC_FPGeneral -// icbi IIC_LdStICBI -// isel IIC_IntISEL -// isync IIC_SprISYNC -// lbz IIC_LdStLoad -// lbzu IIC_LdStLoadUpd -// lbzux IIC_LdStLoadUpdX -// lbzx IIC_LdStLoad -// ld IIC_LdStLD -// ldarx IIC_LdStLDARX -// ldu IIC_LdStLDU -// ldux IIC_LdStLDUX -// ldx IIC_LdStLD -// lfd IIC_LdStLFD -// lfdu IIC_LdStLFDU -// lfdux IIC_LdStLFDUX -// lfdx IIC_LdStLFD -// lfs IIC_LdStLFD -// lfsu IIC_LdStLFDU -// lfsux IIC_LdStLFDUX -// lfsx IIC_LdStLFD -// lha IIC_LdStLHA -// lhau IIC_LdStLHAU -// lhaux IIC_LdStLHAUX -// lhax IIC_LdStLHA -// lhbrx IIC_LdStLoad -// lhz IIC_LdStLoad -// lhzu IIC_LdStLoadUpd -// lhzux IIC_LdStLoadUpdX -// lhzx IIC_LdStLoad -// lmw IIC_LdStLMW -// lswi IIC_LdStLMW -// lswx IIC_LdStLMW -// lvebx IIC_LdStLVecX -// lvehx IIC_LdStLVecX -// lvewx IIC_LdStLVecX -// lvsl IIC_LdStLVecX -// lvsr IIC_LdStLVecX -// lvx IIC_LdStLVecX -// lvxl IIC_LdStLVecX -// lwa IIC_LdStLWA -// lwarx IIC_LdStLWARX -// lwaux IIC_LdStLHAUX -// lwax IIC_LdStLHA -// lwbrx IIC_LdStLoad -// lwz IIC_LdStLoad -// lwzu IIC_LdStLoadUpd -// lwzux IIC_LdStLoadUpdX -// lwzx IIC_LdStLoad -// mcrf IIC_BrMCR -// mcrfs IIC_FPGeneral -// mcrxr IIC_BrMCRX -// mfcr IIC_SprMFCR -// mffs IIC_IntMFFS -// mfmsr IIC_SprMFMSR -// mfspr IIC_SprMFSPR -// mfsr IIC_SprMFSR -// mfsrin IIC_SprMFSR -// mftb IIC_SprMFTB -// mfvscr IIC_IntMFVSCR -// mtcrf IIC_BrMCRX -// mtfsb0 IIC_IntMTFSB0 -// mtfsb1 IIC_IntMTFSB0 -// mtfsf IIC_IntMTFSB0 -// mtfsfi IIC_IntMTFSB0 -// mtmsr IIC_SprMTMSR -// mtmsrd IIC_LdStLD -// mtspr IIC_SprMTSPR -// mtsr IIC_SprMTSR -// mtsrd IIC_IntMTSRD -// mtsrdin IIC_IntMTSRD -// mtsrin IIC_SprMTSRIN -// mtvscr IIC_IntMFVSCR -// mulhd IIC_IntMulHD -// mulhdu IIC_IntMulHD -// mulhw IIC_IntMulHW -// mulhwu IIC_IntMulHWU -// mulld IIC_IntMulHD -// mulli IIC_IntMulLI -// mullw IIC_IntMulHW -// nand IIC_IntSimple -// neg IIC_IntSimple -// nor IIC_IntSimple -// or IIC_IntSimple -// orc IIC_IntSimple -// ori IIC_IntSimple -// oris IIC_IntSimple -// rfi IIC_SprRFI -// rfid IIC_IntRFID -// rldcl IIC_IntRotateD -// rldcr IIC_IntRotateD -// rldic IIC_IntRotateDI -// rldicl IIC_IntRotateDI -// rldicr IIC_IntRotateDI -// rldimi IIC_IntRotateDI -// rlwimi IIC_IntRotate -// rlwinm IIC_IntGeneral -// rlwnm IIC_IntGeneral -// sc IIC_SprSC -// slbia IIC_LdStSLBIA -// slbie IIC_LdStSLBIE -// sld IIC_IntRotateD -// slw IIC_IntGeneral -// srad IIC_IntRotateD -// sradi IIC_IntRotateDI -// sraw IIC_IntShift -// srawi IIC_IntShift -// srd IIC_IntRotateD -// srw IIC_IntGeneral -// stb IIC_LdStStore -// stbu IIC_LdStStoreUpd -// stbux IIC_LdStStoreUpd -// stbx IIC_LdStStore -// std IIC_LdStSTD -// stdcx. IIC_LdStSTDCX -// stdu IIC_LdStSTDU -// stdux IIC_LdStSTDUX -// stdx IIC_LdStSTD -// stfd IIC_LdStSTFD -// stfdu IIC_LdStSTFDU -// stfdux IIC_LdStSTFDU -// stfdx IIC_LdStSTFD -// stfiwx IIC_LdStSTFD -// stfs IIC_LdStSTFD -// stfsu IIC_LdStSTFDU -// stfsux IIC_LdStSTFDU -// stfsx IIC_LdStSTFD -// sth IIC_LdStStore -// sthbrx IIC_LdStStore -// sthu IIC_LdStStoreUpd -// sthux IIC_LdStStoreUpd -// sthx IIC_LdStStore -// stmw IIC_LdStLMW -// stswi IIC_LdStLMW -// stswx IIC_LdStLMW -// stvebx IIC_LdStSTVEBX -// stvehx IIC_LdStSTVEBX -// stvewx IIC_LdStSTVEBX -// stvx IIC_LdStSTVEBX -// stvxl IIC_LdStSTVEBX -// stw IIC_LdStStore -// stwbrx IIC_LdStStore -// stwcx. IIC_LdStSTWCX -// stwu IIC_LdStStoreUpd -// stwux IIC_LdStStoreUpd -// stwx IIC_LdStStore -// subf IIC_IntGeneral -// subfc IIC_IntGeneral -// subfe IIC_IntGeneral -// subfic IIC_IntGeneral -// subfme IIC_IntGeneral -// subfze IIC_IntGeneral -// sync IIC_LdStSync -// td IIC_IntTrapD -// tdi IIC_IntTrapD -// tlbia IIC_LdStSLBIA -// tlbie IIC_LdStDCBF -// tlbsync IIC_SprTLBSYNC -// tw IIC_IntTrapW -// twi IIC_IntTrapW -// vaddcuw IIC_VecGeneral -// vaddfp IIC_VecFP -// vaddsbs IIC_VecGeneral -// vaddshs IIC_VecGeneral -// vaddsws IIC_VecGeneral -// vaddubm IIC_VecGeneral -// vaddubs IIC_VecGeneral -// vadduhm IIC_VecGeneral -// vadduhs IIC_VecGeneral -// vadduwm IIC_VecGeneral -// vadduws IIC_VecGeneral -// vand IIC_VecGeneral -// vandc IIC_VecGeneral -// vavgsb IIC_VecGeneral -// vavgsh IIC_VecGeneral -// vavgsw IIC_VecGeneral -// vavgub IIC_VecGeneral -// vavguh IIC_VecGeneral -// vavguw IIC_VecGeneral -// vcfsx IIC_VecFP -// vcfux IIC_VecFP -// vcmpbfp IIC_VecFPCompare -// vcmpeqfp IIC_VecFPCompare -// vcmpequb IIC_VecGeneral -// vcmpequh IIC_VecGeneral -// vcmpequw IIC_VecGeneral -// vcmpgefp IIC_VecFPCompare -// vcmpgtfp IIC_VecFPCompare -// vcmpgtsb IIC_VecGeneral -// vcmpgtsh IIC_VecGeneral -// vcmpgtsw IIC_VecGeneral -// vcmpgtub IIC_VecGeneral -// vcmpgtuh IIC_VecGeneral -// vcmpgtuw IIC_VecGeneral -// vctsxs IIC_VecFP -// vctuxs IIC_VecFP -// vexptefp IIC_VecFP -// vlogefp IIC_VecFP -// vmaddfp IIC_VecFP -// vmaxfp IIC_VecFPCompare -// vmaxsb IIC_VecGeneral -// vmaxsh IIC_VecGeneral -// vmaxsw IIC_VecGeneral -// vmaxub IIC_VecGeneral -// vmaxuh IIC_VecGeneral -// vmaxuw IIC_VecGeneral -// vmhaddshs IIC_VecComplex -// vmhraddshs IIC_VecComplex -// vminfp IIC_VecFPCompare -// vminsb IIC_VecGeneral -// vminsh IIC_VecGeneral -// vminsw IIC_VecGeneral -// vminub IIC_VecGeneral -// vminuh IIC_VecGeneral -// vminuw IIC_VecGeneral -// vmladduhm IIC_VecComplex -// vmrghb IIC_VecPerm -// vmrghh IIC_VecPerm -// vmrghw IIC_VecPerm -// vmrglb IIC_VecPerm -// vmrglh IIC_VecPerm -// vmrglw IIC_VecPerm -// vmsubfp IIC_VecFP -// vmsummbm IIC_VecComplex -// vmsumshm IIC_VecComplex -// vmsumshs IIC_VecComplex -// vmsumubm IIC_VecComplex -// vmsumuhm IIC_VecComplex -// vmsumuhs IIC_VecComplex -// vmulesb IIC_VecComplex -// vmulesh IIC_VecComplex -// vmuleub IIC_VecComplex -// vmuleuh IIC_VecComplex -// vmulosb IIC_VecComplex -// vmulosh IIC_VecComplex -// vmuloub IIC_VecComplex -// vmulouh IIC_VecComplex -// vnor IIC_VecGeneral -// vor IIC_VecGeneral -// vperm IIC_VecPerm -// vpkpx IIC_VecPerm -// vpkshss IIC_VecPerm -// vpkshus IIC_VecPerm -// vpkswss IIC_VecPerm -// vpkswus IIC_VecPerm -// vpkuhum IIC_VecPerm -// vpkuhus IIC_VecPerm -// vpkuwum IIC_VecPerm -// vpkuwus IIC_VecPerm -// vrefp IIC_VecFPRound -// vrfim IIC_VecFPRound -// vrfin IIC_VecFPRound -// vrfip IIC_VecFPRound -// vrfiz IIC_VecFPRound -// vrlb IIC_VecGeneral -// vrlh IIC_VecGeneral -// vrlw IIC_VecGeneral -// vrsqrtefp IIC_VecFP -// vsel IIC_VecGeneral -// vsl IIC_VecVSL -// vslb IIC_VecGeneral -// vsldoi IIC_VecPerm -// vslh IIC_VecGeneral -// vslo IIC_VecPerm -// vslw IIC_VecGeneral -// vspltb IIC_VecPerm -// vsplth IIC_VecPerm -// vspltisb IIC_VecPerm -// vspltish IIC_VecPerm -// vspltisw IIC_VecPerm -// vspltw IIC_VecPerm -// vsr IIC_VecVSR -// vsrab IIC_VecGeneral -// vsrah IIC_VecGeneral -// vsraw IIC_VecGeneral -// vsrb IIC_VecGeneral -// vsrh IIC_VecGeneral -// vsro IIC_VecPerm -// vsrw IIC_VecGeneral -// vsubcuw IIC_VecGeneral -// vsubfp IIC_VecFP -// vsubsbs IIC_VecGeneral -// vsubshs IIC_VecGeneral -// vsubsws IIC_VecGeneral -// vsububm IIC_VecGeneral -// vsububs IIC_VecGeneral -// vsubuhm IIC_VecGeneral -// vsubuhs IIC_VecGeneral -// vsubuwm IIC_VecGeneral -// vsubuws IIC_VecGeneral -// vsum2sws IIC_VecComplex -// vsum4sbs IIC_VecComplex -// vsum4shs IIC_VecComplex -// vsum4ubs IIC_VecComplex -// vsumsws IIC_VecComplex -// vupkhpx IIC_VecPerm -// vupkhsb IIC_VecPerm -// vupkhsh IIC_VecPerm -// vupklpx IIC_VecPerm -// vupklsb IIC_VecPerm -// vupklsh IIC_VecPerm -// vxor IIC_VecGeneral -// xor IIC_IntSimple -// xori IIC_IntSimple -// xoris IIC_IntSimple -// diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c91428d..ed88803 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() { HasVSX = false; HasP8Vector = false; HasP8Altivec = false; + HasP8Crypto = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -94,7 +95,9 @@ void PPCSubtarget::initializeEnvironment() { HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; + HasPartwordAtomics = false; IsQPXStackUnaligned = false; + HasHTM = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 247a96d..b4c1bb1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -90,6 +90,7 @@ protected: bool HasVSX; bool HasP8Vector; bool HasP8Altivec; + bool HasP8Crypto; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -113,6 +114,8 @@ protected: bool IsLittleEndian; bool HasICBT; bool HasInvariantFunctionDescriptors; + bool HasPartwordAtomics; + bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -218,6 +221,7 @@ public: bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } + bool hasP8Crypto() const { return HasP8Crypto; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } @@ -234,6 +238,7 @@ public: bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } + bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { @@ -242,6 +247,7 @@ public: return 16; } + bool hasHTM() const { return HasHTM; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b219e93..7267529 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -160,11 +160,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM, - CM, OL), + : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU, + computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), TLOF(createTLOF(Triple(getTargetTriple()))), - TargetABI(computeTargetABI(Triple(TT), Options)), - DL(getDataLayoutString(Triple(TT))), Subtarget(TT, CPU, TargetFS, *this) { + TargetABI(computeTargetABI(Triple(TT), Options)) { initAsmInfo(); } @@ -208,7 +207,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this); + I = llvm::make_unique<PPCSubtarget>( + TargetTriple, CPU, + // FIXME: It would be good to have the subtarget additions here + // not necessary. Anything that turns them on/off (overrides) ends + // up being put at the end of the feature string, but the defaults + // shouldn't require adding them. Fixing this means pulling Feature64Bit + // out of most of the target cpus in the .td file and making it set only + // as part of initialization via the TargetTriple. + computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); } return I.get(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 6508484..7a49058 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -29,10 +29,6 @@ public: private: std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; - // Calculates type size & alignment - const DataLayout DL; - PPCSubtarget Subtarget; - mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; public: @@ -42,8 +38,6 @@ public: ~PPCTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } const PPCSubtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 073bbb0..b46acd4 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, BaseT::getUnrollingPreferences(L, UP); } +bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { + return LoopHasReductions; +} + unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index cef7079..21acea1 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -78,6 +78,7 @@ public: /// \name Vector TTI Implementations /// @{ + bool enableAggressiveInterleaving(bool LoopHasReductions); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 4132b04..dfe988f 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -621,3 +621,10 @@ void foo() { bar(x); __asm__("" ::: "cr2"); } + +//===----------------------------------------------------------------------===// + +Instruction fusion was introduced in ISA 2.06 and more opportunities added in +ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities +and force instruction pairs to be scheduled together. + diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 1e4c6fb..43d87d3 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -209,3 +209,107 @@ vector float f(vector float a, vector float b) { return b; } +//===----------------------------------------------------------------------===// + +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 + store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# BB#0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. + +//===----------------------------------------------------------------------===// + +The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result + +This will generate the following instruction sequence: + std 5, -8(1) + std 5, -16(1) + addi 3, 1, -16 + ori 2, 2, 0 + lxvd2x 35, 0, 3 + vaddudm 2, 2, 3 + blr + +This will almost certainly cause a load-hit-store hazard. +Since val is a value parameter, it should not need to be saved onto +the stack, unless it's being done set up the vector register. Instead, +it would be better to splat teh value into a vector register, and then +remove the (dead) stores to the stack. + +//===----------------------------------------------------------------------===// + +At the moment we always generate a lxsdx in preference to lfd, or stxsdx in +preference to stfd. When we have a reg-immediate addressing mode, this is a +poor choice, since we have to load the address into an index register. This +should be fixed for P7/P8. + +//===----------------------------------------------------------------------===// + +Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. +However, we could actually support both kinds on either endianness, if we check +for the appropriate shufflevector pattern for each case ... this would cause +some additional shufflevectors to be recognized and implemented via the +"swapped" form. + +//===----------------------------------------------------------------------===// + +There is a utility program called PerfectShuffle that generates a table of the +shortest instruction sequence for implementing a shufflevector operation on +PowerPC. However, this was designed for big-endian code generation. We could +modify this program to create a little endian version of the table. The table +is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). + +//===----------------------------------------------------------------------===// + +Opportunies to use instructions from PPCInstrVSX.td during code gen + - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) + - Scalar comparisons (xscmpodp and xscmpudp) + - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) + +Related to this: we currently do not generate the lxvw4x instruction for either +v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires +a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. |