diff options
Diffstat (limited to 'lib/Target/PowerPC')
28 files changed, 438 insertions, 205 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 99a1633..90ab7a5 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, Inst = TmpInst; break; } + case PPC::RLWINMbm: + case PPC::RLWINMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWIMIbm: + case PPC::RLWIMIobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); // The tied operand. + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } + case PPC::RLWNMbm: + case PPC::RLWNMobm: { + unsigned MB, ME; + int64_t BM = Inst.getOperand(3).getImm(); + if (!isRunOfOnes(BM, MB, ME)) + break; + + MCInst TmpInst; + TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(MCOperand::CreateImm(MB)); + TmpInst.addOperand(MCOperand::CreateImm(ME)); + Inst = TmpInst; + break; + } } } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index a9f5fc7..5cbf3d9 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler { public: PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~PPCDisassembler() {} + ~PPCDisassembler() override {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 311a4f2..1576544 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8718743..eca37eb 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -32,7 +32,8 @@ public: } void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index bea88a2..420c5c8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -208,7 +208,7 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -224,8 +224,7 @@ namespace { ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index b817394..3e3489f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, } } -MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, bool IsLittleEndian, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b9f0afb..725b47b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -44,7 +44,7 @@ public: : MCII(mcii), CTX(ctx), IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {} - ~PPCMCCodeEmitter() {} + ~PPCMCCodeEmitter() override {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2f7a768..423e427 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { return new PPCTargetMachOStreamer(S); } -static MCInstPrinter *createPPCMCInstPrinter(const Target &T, +static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin(); - return new PPCInstPrinter(MAI, MII, MRI, isDarwin); + const MCRegisterInfo &MRI) { + return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin()); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 8b1e3b4..5f2117c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -18,6 +18,7 @@ #undef PPC #include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" namespace llvm { class MCAsmBackend; @@ -29,6 +30,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_pwrite_stream; class raw_ostream; extern Target ThePPC32Target; @@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -/// createPPCELFObjectWriter - Construct an PPC ELF object writer. -MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - bool IsLittleEndian, - uint8_t OSABI); -/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. -MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, +/// Construct an PPC ELF object writer. +MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI); +/// Construct a PPC Mach-O object writer. +MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype); + +/// Returns true iff Val consists of one contiguous run of 1s with any number of +/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so +/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, +/// since all 1s are not contiguous. +static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (!Val) + return false; + + if (isShiftedMask_32(Val)) { + // look for the first non-zero bit + MB = countLeadingZeros(Val); + // look for the first zero bit after the run of ones + ME = countLeadingZeros((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_32(Val)) { + // effectively look for the first zero bit + ME = countLeadingZeros(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = countLeadingZeros((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + } // End llvm namespace // Generated files will use "namespace PPC". To avoid symbol clash, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index f7259b9..44e69b7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, - uint32_t CPUType, +MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter( new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f175f6d..1a02bcc 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -86,6 +86,10 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; +def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", + "Enable the bpermd instruction">; +def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", + "Enable extended divide instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", @@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureDirectMove : + SubtargetFeature<"direct-move", "HasDirectMove", "true", + "Enable Power8 direct move instructions", + [FeatureVSX]>; def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; @@ -133,6 +141,38 @@ def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; +/* Since new processors generally contain a superset of features of those that + came before them, the idea is to make implementations of new processors + less error prone and easier to read. + Namely: + list<SubtargetFeature> Power8FeatureList = ... + list<SubtargetFeature> FutureProcessorSpecificFeatureList = + [ features that Power8 does not support ] + list<SubtargetFeature> FutureProcessorFeatureList = + !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) + + Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as + well as providing a single point of definition if the feature set will be + used elsewhere. +*/ +def ProcessorFeatures { + list<SubtargetFeature> Power7FeatureList = + [DirectivePwr7, FeatureAltivec, FeatureVSX, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + FeatureBPERMD, FeatureExtDiv, + DeprecatedMFTB, DeprecatedDST]; + list<SubtargetFeature> Power8SpecificFeatures = + [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; + list<SubtargetFeature> Power8FeatureList = + !listconcat(Power7FeatureList, Power8SpecificFeatures); +} + // Note: Future features to add when support is extended to more // recent ISA levels: // @@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, FeatureFRES, FeatureFRSQRTE]>; -/* Since new processors generally contain a superset of features of those that - came before them, the idea is to make implementations of new processors - less error prone and easier to read. - Namely: - list<SubtargetFeature> Power8FeatureList = ... - list<SubtargetFeature> FutureProcessorSpecificFeatureList = - [ features that Power8 does not support ] - list<SubtargetFeature> FutureProcessorFeatureList = - !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) - - Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as - well as providing a single point of definition if the feature set will be - used elsewhere. - -*/ -def ProcessorFeatures { - list<SubtargetFeature> Power8FeatureList = - [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, - FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, - FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, - Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; -} - def : ProcessorModel<"970", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, @@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", P7Model, - [DirectivePwr7, FeatureAltivec, FeatureVSX, - FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, - FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, - DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index cd60906..383a1e2 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } - MachineModuleInfoELF &MMIELF = - MMI->getObjFileInfo<MachineModuleInfoELF>(); - - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/); - } - - Stubs.clear(); - OutStreamer.AddBlankLine(); - } - return AsmPrinter::doFinalization(M); } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index fbd7b6d..002616b 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } // Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); @@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, } // Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); @@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && RetVT != MVT::i8) return false; + else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits()) + // We can't handle boolean returns when CR bits are in use. + return false; // FIXME: No multi-register return values yet. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3ac8e94..4f8d01b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -105,13 +105,6 @@ namespace { return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } - /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s - /// with any number of 0s on either side. The 1s are allowed to wrap from - /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. - /// 0x0F0F0000 is not, since all 1s are not contiguous. - static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME); - - /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, @@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { getSmallIPtrImm(Offset)); } -bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { - if (!Val) - return false; - - if (isShiftedMask_32(Val)) { - // look for the first non-zero bit - MB = countLeadingZeros(Val); - // look for the first zero bit after the run of ones - ME = countLeadingZeros((Val - 1) ^ Val); - return true; - } else { - Val = ~Val; // invert mask - if (isShiftedMask_32(Val)) { - // effectively look for the first zero bit - ME = countLeadingZeros(Val) - 1; - // effectively look for the first one bit after the run of zeros - MB = countLeadingZeros((Val - 1) ^ Val) + 1; - return true; - } - } - // no run present - return false; -} - bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 871531e..4c0b6a6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; + case PPCISD::MFVSR: return "PPCISD::MFVSR"; + case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; + case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -/// isAllNegativeZeroVector - Returns true if all elements of build_vector -/// are -0.0. -bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) - return CFP->getValueAPF().isNegZero(); - - return false; -} - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, MVT::i32), 8, false, true, + DAG.getConstant(12, MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(), + false, false, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, RLI.MPI = MPI; } +/// \brief Custom lowers floating point to integer conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode( + Op.getOpcode() == ISD::FP_TO_SINT + ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); + break; + case MVT::i64: + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); + break; + } + return Tmp; +} + SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { + if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) + return LowerFP_TO_INTDirectMove(Op, DAG, dl); + ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); @@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Custom lowers integer to floating point conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert((Op.getValueType() == MVT::f32 || + Op.getValueType() == MVT::f64) && + "Invalid floating point type as target of conversion"); + assert(Subtarget.hasFPCVT() && + "Int to FP conversions with direct moves require FPCVT"); + SDValue FP; + SDValue Src = Op.getOperand(0); + bool SinglePrec = Op.getValueType() == MVT::f32; + bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; + unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : + (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); + + if (WordInt) { + FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, + dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + else { + FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + + return FP; +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, DAG.getConstantFP(1.0, Op.getValueType()), DAG.getConstantFP(0.0, Op.getValueType())); + // If we have direct moves, we can do all the conversion, skip the store/load + // however, without FPCVT we can't do most conversions. + if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + return LowerINT_TO_FPDirectMove(Op, DAG, dl); + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) || SplatBitSize > 32) + HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || + SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } - // The remaining cases assume either big endian element order or - // a splat-size that equates to the element size of the vector - // to be built. An example that doesn't work for little endian is - // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits - // and a vector element size of 16 bits. The code below will - // produce the vector in big endian element order, which for little - // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. - - // For now, just avoid these optimizations in that case. - // FIXME: Develop correct optimizations for LE with mismatched - // splat and element sizes. - - if (Subtarget.isLittleEndian() && - SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) - return SDValue(); - // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; @@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); - // When expanding a memset, require at least two QPX instructions to cover - // the cost of loading the value to be stored from the constant pool. - if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && - (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { - return MVT::v4f64; - } - - // We should use Altivec/VSX loads and stores when available. For unaligned - // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Size >= 16 && - (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || - ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) - return MVT::v4i32; + if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + } if (Subtarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 8afd7ef..7e2ebd4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -119,6 +119,15 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + /// Direct move from a VSX register to a GPR + MFVSR, + + /// Direct move from a GPR to a VSX register (algebraic) + MTVSRA, + + /// Direct move from a GPR to a VSX register (zero) + MTVSRZ, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to @@ -368,10 +377,6 @@ namespace llvm { /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - /// isAllNegativeZeroVector - Returns true if all elements of build_vector - /// are -0.0. - bool isAllNegativeZeroVector(SDNode *N); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); @@ -649,6 +654,10 @@ namespace llvm { void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, SDLoc dl) const; + SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; + SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const; SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 183d088..d1d67cb 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; +def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "bpermd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>, + isPPC64, Requires<[HasBPERMD]>; let isCodeGenOnly = 1, isCommutable = 1 in def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), @@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; -defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IIC_IntDivD, - [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divd", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64; +defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdu", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64; +def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; +def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; let isCommutable = 1 in defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "mulld", "$rT, $rA, $rB", IIC_IntMulHD, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index b7a7a1f..43c2158 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let B = 0; +} + class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 5eff156..8aecb65 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; +def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; +def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, } } +// Multiclass for instructions for which the non record form is not cracked +// and the record form is cracked (i.e. divw, mullw, etc.) +multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel, PPC970_DGroup_First, + PPC970_DGroup_Cracked; + } +} + multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list<dag> pattern> { @@ -2300,14 +2319,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; -defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IIC_IntDivW, - [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divw", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>; +defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwu", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; +def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; +def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, @@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0) def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; + // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. let PPC970_Unit = 7 in { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index ec04da4..a98e58f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -41,6 +41,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), when the elements are larger than i32. */ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let isCommutable = 1 in { @@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170, [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; } // AddedComplexity = 500 } // HasP8Vector + +let Predicates = [HasDirectMove, HasVSX] in { +// VSX direct move instructions +def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; +def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; +def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; +def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; +def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove, HasVSX diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index 005bcaf..2947c66 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "ppc-loop-data-prefetch" #include "PPC.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" @@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 092a4ef..b6e7799 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "ppc-loop-preinc-prep" #include "PPC.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { - Loop *L = *I; - MadeChange |= runOnLoop(L); - } + for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) + for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) + MadeChange |= runOnLoop(*L); return MadeChange; } @@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!L->empty()) return MadeChange; + DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); + BasicBlock *Header = L->getHeader(); const PPCSubtarget *ST = TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - unsigned HeaderLoopPredCount = 0; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - ++HeaderLoopPredCount; - } + unsigned HeaderLoopPredCount = + std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket; @@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (L->isLoopInvariant(PtrValue)) continue; - const SCEV *LSCEV = SE->getSCEV(PtrValue); - if (!isa<SCEVAddRecExpr>(LSCEV)) + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { + if (LARSCEV->getLoop() != L) + continue; + } else { continue; + } bool FoundBucket = false; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) @@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // returns a value (which might contribute to determining the loop's // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || - !LoopPredecessor->getTerminator()->getType()->isVoidTy()) + !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { LoopPredecessor = InsertPreheaderForLoop(L, this); + if (LoopPredecessor) + MadeChange = true; + } if (!LoopPredecessor) return MadeChange; + DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); + SmallSet<BasicBlock *, 16> BBChanged; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { // The base address of each bucket is transformed into a phi and the others @@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!BasePtrSCEV->isAffine()) continue; + DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + assert(BasePtrSCEV->getLoop() == L && + "AddRec for the wrong loop?"); + Instruction *MemI = Buckets[i].begin()->second; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!isSafeToExpand(BasePtrStartSCEV, *SE)) continue; + DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 0965cb3..6df89fe 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); // If the target flags on the operand changes the name of the symbol, do that diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index ed88803..f313b0a 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include <cstdlib> @@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() { HasFPCVT = false; HasISEL = false; HasPOPCNTD = false; + HasBPERMD = false; + HasExtDiv = false; HasCMPB = false; HasLDBRX = false; IsBookE = false; @@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() { HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; + HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; } @@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else CPUName = "generic"; } -#if (defined(__APPLE__) || defined(__linux__)) && \ - (defined(__ppc__) || defined(__powerpc__)) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index b4c1bb1..8d95508 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -101,6 +101,8 @@ protected: bool HasFPCVT; bool HasISEL; bool HasPOPCNTD; + bool HasBPERMD; + bool HasExtDiv; bool HasCMPB; bool HasLDBRX; bool IsBookE; @@ -115,6 +117,7 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasDirectMove; bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack @@ -225,6 +228,8 @@ public: bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasBPERMD() const { return HasBPERMD; } + bool hasExtDiv() const { return HasExtDiv; } bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } @@ -239,6 +244,7 @@ public: return HasInvariantFunctionDescriptors; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 6493713..8aaf5e1 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -16,7 +16,7 @@ namespace llvm { class PPCTargetStreamer : public MCTargetStreamer { public: PPCTargetStreamer(MCStreamer &S); - virtual ~PPCTargetStreamer(); + ~PPCTargetStreamer() override; virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index dfe988f..01233ae 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -622,6 +622,25 @@ void foo() { __asm__("" ::: "cr2"); } +//===-------------------------------------------------------------------------=== +Naming convention for instruction formats is very haphazard. +We have agreed on a naming scheme as follows: + +<INST_form>{_<OP_type><OP_len>}+ + +Where: +INST_form is the instruction format (X-form, etc.) +OP_type is the operand type - one of OPC (opcode), RD (register destination), + RS (register source), + RDp (destination register pair), + RSp (source register pair), IM (immediate), + XO (extended opcode) +OP_len is the length of the operand in bits + +VSX register operands would be of length 6 (split across two fields), +condition register fields of length 3. +We would not need denote reserved fields in names of instruction formats. + //===----------------------------------------------------------------------===// Instruction fusion was introduced in ISA 2.06 and more opportunities added in diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 43d87d3..1d5b092 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -277,7 +277,7 @@ This will generate the following instruction sequence: This will almost certainly cause a load-hit-store hazard. Since val is a value parameter, it should not need to be saved onto the stack, unless it's being done set up the vector register. Instead, -it would be better to splat teh value into a vector register, and then +it would be better to splat the value into a vector register, and then remove the (dead) stores to the stack. //===----------------------------------------------------------------------===// |