diff options
Diffstat (limited to 'lib/Target/PowerPC')
60 files changed, 3738 insertions, 2405 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 2f562ca..06bb968 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -213,16 +214,12 @@ struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; - MCAsmParser &Parser; const MCInstrInfo &MII; bool IsPPC64; bool IsDarwin; - MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); } bool isPPC64() const { return IsPPC64; } bool isDarwin() const { return IsDarwin; } @@ -244,10 +241,12 @@ class PPCAsmParser : public MCTargetAsmParser { bool ParseDirectiveTC(unsigned Size, SMLoc L); bool ParseDirectiveMachine(SMLoc L); bool ParseDarwinDirectiveMachine(SMLoc L); + bool ParseDirectiveAbiVersion(SMLoc L); + bool ParseDirectiveLocalEntry(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, - unsigned &ErrorInfo, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; void ProcessInstruction(MCInst &Inst, const OperandVector &Ops); @@ -263,9 +262,8 @@ class PPCAsmParser : public MCTargetAsmParser { public: PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) { + const MCInstrInfo &_MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(_STI), MII(_MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || @@ -294,6 +292,7 @@ struct PPCOperand : public MCParsedAsmOperand { enum KindTy { Token, Immediate, + ContextImmediate, Expression, TLSRegister } Kind; @@ -338,6 +337,7 @@ public: Tok = o.Tok; break; case Immediate: + case ContextImmediate: Imm = o.Imm; break; case Expression: @@ -362,6 +362,16 @@ public: assert(Kind == Immediate && "Invalid access!"); return Imm.Val; } + int64_t getImmS16Context() const { + assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!"); + if (Kind == Immediate) + return Imm.Val; + return static_cast<int16_t>(Imm.Val); + } + int64_t getImmU16Context() const { + assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!"); + return Imm.Val; + } const MCExpr *getExpr() const { assert(Kind == Expression && "Invalid access!"); @@ -406,22 +416,73 @@ public: bool isToken() const override { return Kind == Token; } bool isImm() const override { return Kind == Immediate || Kind == Expression; } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } + bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } - bool isU16Imm() const { return Kind == Expression || - (Kind == Immediate && isUInt<16>(getImm())); } - bool isS16Imm() const { return Kind == Expression || - (Kind == Immediate && isInt<16>(getImm())); } + bool isU6ImmX2() const { return Kind == Immediate && + isUInt<6>(getImm()) && + (getImm() & 1) == 0; } + bool isU7ImmX4() const { return Kind == Immediate && + isUInt<7>(getImm()) && + (getImm() & 3) == 0; } + bool isU8ImmX8() const { return Kind == Immediate && + isUInt<8>(getImm()) && + (getImm() & 7) == 0; } + bool isU16Imm() const { + switch (Kind) { + case Expression: + return true; + case Immediate: + case ContextImmediate: + return isUInt<16>(getImmU16Context()); + default: + return false; + } + } + bool isS16Imm() const { + switch (Kind) { + case Expression: + return true; + case Immediate: + case ContextImmediate: + return isInt<16>(getImmS16Context()); + default: + return false; + } + } bool isS16ImmX4() const { return Kind == Expression || (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } - bool isS17Imm() const { return Kind == Expression || - (Kind == Immediate && isInt<17>(getImm())); } + bool isS17Imm() const { + switch (Kind) { + case Expression: + return true; + case Immediate: + case ContextImmediate: + return isInt<17>(getImmS16Context()); + default: + return false; + } + } bool isTLSReg() const { return Kind == TLSRegister; } - bool isDirectBr() const { return Kind == Expression || - (Kind == Immediate && isInt<26>(getImm()) && - (getImm() & 3) == 0); } + bool isDirectBr() const { + if (Kind == Expression) + return true; + if (Kind != Immediate) + return false; + // Operand must be 64-bit aligned, signed 27-bit immediate. + if ((getImm() & 3) != 0) + return false; + if (isInt<26>(getImm())) + return true; + if (!IsPPC64) { + // In 32-bit mode, large 32-bit quantities wrap around. + if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm()))) + return true; + } + return false; + } bool isCondBr() const { return Kind == Expression || (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } @@ -526,6 +587,36 @@ public: Inst.addOperand(MCOperand::CreateExpr(getExpr())); } + void addS16ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + switch (Kind) { + case Immediate: + Inst.addOperand(MCOperand::CreateImm(getImm())); + break; + case ContextImmediate: + Inst.addOperand(MCOperand::CreateImm(getImmS16Context())); + break; + default: + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + break; + } + } + + void addU16ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + switch (Kind) { + case Immediate: + Inst.addOperand(MCOperand::CreateImm(getImm())); + break; + case ContextImmediate: + Inst.addOperand(MCOperand::CreateImm(getImmU16Context())); + break; + default: + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + break; + } + } + void addBranchTargetOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); if (Kind == Immediate) @@ -566,9 +657,9 @@ public: // explicitly. void *Mem = ::operator new(sizeof(PPCOperand) + Str.size()); std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token)); - Op->Tok.Data = (const char *)(Op.get() + 1); + Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1); Op->Tok.Length = Str.size(); - std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size()); + std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size()); Op->StartLoc = S; Op->EndLoc = S; Op->IsPPC64 = IsPPC64; @@ -607,6 +698,16 @@ public: } static std::unique_ptr<PPCOperand> + CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { + auto Op = make_unique<PPCOperand>(ContextImmediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static std::unique_ptr<PPCOperand> CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val)) return CreateImm(CE->getValue(), S, E, IsPPC64); @@ -615,6 +716,12 @@ public: if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS) return CreateTLSReg(SRE, S, E, IsPPC64); + if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) { + int64_t Res; + if (TE->EvaluateAsConstant(Res)) + return CreateContextImm(Res, S, E, IsPPC64); + } + return CreateExpr(Val, S, E, IsPPC64); } }; @@ -627,6 +734,7 @@ void PPCOperand::print(raw_ostream &OS) const { OS << "'" << getToken() << "'"; break; case Immediate: + case ContextImmediate: OS << getImm(); break; case Expression: @@ -638,6 +746,29 @@ void PPCOperand::print(raw_ostream &OS) const { } } +static void +addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) { + if (Op.isImm()) { + Inst.addOperand(MCOperand::CreateImm(-Op.getImm())); + return; + } + const MCExpr *Expr = Op.getExpr(); + if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) { + if (UnExpr->getOpcode() == MCUnaryExpr::Minus) { + Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr())); + return; + } + } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) { + if (BinExpr->getOpcode() == MCBinaryExpr::Sub) { + const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(), + BinExpr->getLHS(), Ctx); + Inst.addOperand(MCOperand::CreateExpr(NE)); + return; + } + } + Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx))); +} + void PPCAsmParser::ProcessInstruction(MCInst &Inst, const OperandVector &Operands) { int Opcode = Inst.getOpcode(); @@ -653,41 +784,37 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, } case PPC::SUBI: { MCInst TmpInst; - int64_t N = Inst.getOperand(2).getImm(); TmpInst.setOpcode(PPC::ADDI); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); - TmpInst.addOperand(MCOperand::CreateImm(-N)); + addNegOperand(TmpInst, Inst.getOperand(2), getContext()); Inst = TmpInst; break; } case PPC::SUBIS: { MCInst TmpInst; - int64_t N = Inst.getOperand(2).getImm(); TmpInst.setOpcode(PPC::ADDIS); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); - TmpInst.addOperand(MCOperand::CreateImm(-N)); + addNegOperand(TmpInst, Inst.getOperand(2), getContext()); Inst = TmpInst; break; } case PPC::SUBIC: { MCInst TmpInst; - int64_t N = Inst.getOperand(2).getImm(); TmpInst.setOpcode(PPC::ADDIC); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); - TmpInst.addOperand(MCOperand::CreateImm(-N)); + addNegOperand(TmpInst, Inst.getOperand(2), getContext()); Inst = TmpInst; break; } case PPC::SUBICo: { MCInst TmpInst; - int64_t N = Inst.getOperand(2).getImm(); TmpInst.setOpcode(PPC::ADDICo); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(1)); - TmpInst.addOperand(MCOperand::CreateImm(-N)); + addNegOperand(TmpInst, Inst.getOperand(2), getContext()); Inst = TmpInst; break; } @@ -921,7 +1048,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; @@ -939,7 +1066,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "unrecognized instruction mnemonic"); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { + if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -995,6 +1122,7 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) { bool PPCAsmParser:: ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); StartLoc = Tok.getLoc(); EndLoc = Tok.getEndLoc(); @@ -1176,6 +1304,7 @@ ParseExpression(const MCExpr *&EVal) { /// for this to be done at a higher level. bool PPCAsmParser:: ParseDarwinExpression(const MCExpr *&EVal) { + MCAsmParser &Parser = getParser(); PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None; switch (getLexer().getKind()) { default: @@ -1218,6 +1347,7 @@ ParseDarwinExpression(const MCExpr *&EVal) { /// This handles registers in the form 'NN', '%rNN' for ELF platforms and /// rNN for MachO. bool PPCAsmParser::ParseOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); const MCExpr *EVal; @@ -1412,6 +1542,10 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); if (IDVal == ".machine") return ParseDirectiveMachine(DirectiveID.getLoc()); + if (IDVal == ".abiversion") + return ParseDirectiveAbiVersion(DirectiveID.getLoc()); + if (IDVal == ".localentry") + return ParseDirectiveLocalEntry(DirectiveID.getLoc()); } else { if (IDVal == ".machine") return ParseDarwinDirectiveMachine(DirectiveID.getLoc()); @@ -1422,6 +1556,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { /// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -1446,6 +1581,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { /// ParseDirectiveTC /// ::= .tc [ symbol (, expression)* ] bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { + MCAsmParser &Parser = getParser(); // Skip TC symbol, which is only used with XCOFF. while (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().isNot(AsmToken::Comma)) @@ -1466,6 +1602,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { /// ParseDirectiveMachine (ELF platforms) /// ::= .machine [ cpu | "push" | "pop" ] bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::Identifier) && getLexer().isNot(AsmToken::String)) { Error(L, "unexpected token in directive"); @@ -1500,6 +1637,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { /// ParseDarwinDirectiveMachine (Mach-o platforms) /// ::= .machine cpu-identifier bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::Identifier) && getLexer().isNot(AsmToken::String)) { Error(L, "unexpected token in directive"); @@ -1534,6 +1672,64 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) { return false; } +/// ParseDirectiveAbiVersion +/// ::= .abiversion constant-expression +bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) { + int64_t AbiVersion; + if (getParser().parseAbsoluteExpression(AbiVersion)){ + Error(L, "expected constant expression"); + return false; + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitAbiVersion(AbiVersion); + + return false; +} + +/// ParseDirectiveLocalEntry +/// ::= .localentry symbol, expression +bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { + StringRef Name; + if (getParser().parseIdentifier(Name)) { + Error(L, "expected identifier in directive"); + return false; + } + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } + Lex(); + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) { + Error(L, "expected expression"); + return false; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitLocalEntry(Sym, Expr); + + return false; +} + + + /// Force static initialization. extern "C" void LLVMInitializePowerPCAsmParser() { RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target); @@ -1558,6 +1754,10 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, case MCK_1: ImmVal = 1; break; case MCK_2: ImmVal = 2; break; case MCK_3: ImmVal = 3; break; + case MCK_4: ImmVal = 4; break; + case MCK_5: ImmVal = 5; break; + case MCK_6: ImmVal = 6; break; + case MCK_7: ImmVal = 7; break; default: return Match_InvalidOperand; } diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index ea4de63..47a9474 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel) @@ -16,7 +15,6 @@ add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen PPCAsmPrinter.cpp PPCBranchSelector.cpp - PPCCodeEmitter.cpp PPCCTRLoops.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp @@ -24,7 +22,6 @@ add_llvm_target(PowerPCCodeGen PPCISelLowering.cpp PPCFastISel.cpp PPCFrameLowering.cpp - PPCJITInfo.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCRegisterInfo.cpp diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt index c1011ff..ea3e7ea 100644 --- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt +++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCDisassembler parent = PowerPC -required_libraries = MC PowerPCDesc PowerPCInfo Support +required_libraries = MCDisassembler PowerPCInfo Support add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index a2305a9..5251b60 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -12,7 +12,6 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -28,13 +27,10 @@ public: : MCDisassembler(STI, Ctx) {} virtual ~PPCDisassembler() {} - // Override MCDisassembler. - virtual DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; }; } // end anonymous namespace @@ -325,23 +321,19 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, #include "PPCGenDisassemblerTables.inc" DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { + ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, + raw_ostream &CS) const { // Get the four bytes of the instruction. - uint8_t Bytes[4]; Size = 4; - if (Region.readBytes(Address, Size, Bytes) == -1) { + if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } // The instruction is big-endian encoded. - uint32_t Inst = (Bytes[0] << 24) | - (Bytes[1] << 16) | - (Bytes[2] << 8) | - (Bytes[3] << 0); + uint32_t Inst = + (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0); return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 7279b09..670c40a 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOpcodes.h" @@ -207,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 15 && "Invalid u4imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); @@ -260,7 +268,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, if (!MI->getOperand(OpNo).isImm()) return printOperand(MI, OpNo, O); - O << (int)MI->getOperand(OpNo).getImm()*4; + O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); } @@ -308,10 +316,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printBranchOperand(MI, OpNo, O); + // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must + // come at the _end_ of the expression. + const MCOperand &Op = MI->getOperand(OpNo); + const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr()); + O << refExp.getSymbol().getName(); O << '('; printOperand(MI, OpNo+1, O); O << ')'; + if (refExp.getKind() != MCSymbolRefExpr::VK_None) + O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 211a628..b21aa22 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCINSTPRINTER_H -#define PPCINSTPRINTER_H +#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H +#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" @@ -44,6 +44,7 @@ public: raw_ostream &O, const char *Modifier = nullptr); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 12584be..c54d5e7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -9,7 +9,9 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" @@ -128,6 +130,30 @@ public: } } + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override { + switch ((PPC::Fixups)Fixup.getKind()) { + default: break; + case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: + // If the target symbol has a local entry point we must not attempt + // to resolve the fixup directly. Emit a relocation and leave + // resolution of the final target address to the linker. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol()); + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(Data) << 2; + if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) + IsResolved = false; + } + break; + } + } + bool mayNeedRelaxation(const MCInst &Inst) const override { // FIXME. return false; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index cd3b4f4..b817394 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" @@ -23,13 +24,12 @@ namespace { public: PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI); - virtual ~PPCELFObjectWriter(); protected: - virtual unsigned getRelocTypeInner(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const; unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; + + bool needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const override; }; } @@ -38,9 +38,6 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, /*HasRelocationAddend*/ true) {} -PPCELFObjectWriter::~PPCELFObjectWriter() { -} - static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target, const MCFixup &Fixup) { const MCExpr *Expr = Fixup.getValue(); @@ -69,10 +66,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target, llvm_unreachable("unknown PPCMCExpr kind"); } -unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const -{ +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup); // determine the type of the relocation @@ -83,7 +79,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: - Type = ELF::R_PPC_REL24; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_REL24; + break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_PPC_PLTREL24; + break; + case MCSymbolRefExpr::VK_PPC_LOCAL: + Type = ELF::R_PPC_LOCAL24PC; + break; + } break; case PPC::fixup_ppc_brcond14: case PPC::fixup_ppc_brcond14abs: @@ -224,7 +231,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_DTPREL16_HIGHESTA; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: - Type = ELF::R_PPC64_GOT_TLSGD16; + if (is64Bit()) + Type = ELF::R_PPC64_GOT_TLSGD16; + else + Type = ELF::R_PPC_GOT_TLSGD16; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: Type = ELF::R_PPC64_GOT_TLSGD16_LO; @@ -236,7 +246,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_GOT_TLSGD16_HA; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: - Type = ELF::R_PPC64_GOT_TLSLD16; + if (is64Bit()) + Type = ELF::R_PPC64_GOT_TLSLD16; + else + Type = ELF::R_PPC_GOT_TLSLD16; break; case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: Type = ELF::R_PPC64_GOT_TLSLD16_LO; @@ -332,13 +345,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TLSGD: - Type = ELF::R_PPC64_TLSGD; + if (is64Bit()) + Type = ELF::R_PPC64_TLSGD; + else + Type = ELF::R_PPC_TLSGD; break; case MCSymbolRefExpr::VK_PPC_TLSLD: - Type = ELF::R_PPC64_TLSLD; + if (is64Bit()) + Type = ELF::R_PPC64_TLSLD; + else + Type = ELF::R_PPC_TLSLD; break; case MCSymbolRefExpr::VK_PPC_TLS: - Type = ELF::R_PPC64_TLS; + if (is64Bit()) + Type = ELF::R_PPC64_TLS; + else + Type = ELF::R_PPC_TLS; break; } break; @@ -373,10 +395,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, return Type; } -unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - return getRelocTypeInner(Target, Fixup, IsPCRel); +bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const { + switch (Type) { + default: + return false; + + case ELF::R_PPC_REL24: + // If the target symbol has a local entry point, we must keep the + // target symbol to preserve that information for the linker. + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(SD) << 2; + return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0; + } } MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 68de8c1..ae43e59 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_PPC_PPCFIXUPKINDS_H -#define LLVM_PPC_PPCFIXUPKINDS_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b95a2ac..893aae3 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -42,9 +42,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { UseIntegratedAssembler = true; } -void PPCLinuxMCAsmInfo::anchor() { } +void PPCELFMCAsmInfo::anchor() { } -PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { +PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } @@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { DollarIsPC = true; // Set up DWARF directives - HasLEB128 = true; // Target asm supports leb128 directives (little-endian) MinInstAlignment = 4; // Exceptions handling @@ -73,6 +72,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr; AssemblerDialect = 1; // New-Style mnemonics. + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; if (T.getOS() == llvm::Triple::FreeBSD || (T.getOS() == llvm::Triple::NetBSD && !is64Bit) || diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 754330b..9f0294d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCTARGETASMINFO_H -#define PPCTARGETASMINFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" @@ -26,10 +26,10 @@ class Triple; explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; - class PPCLinuxMCAsmInfo : public MCAsmInfoELF { + class PPCELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&); + explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&); }; } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 435a93f..786b7fe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -66,6 +66,15 @@ public: unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; @@ -260,6 +269,54 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, } +unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) + const { + // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8) + // as the displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5; + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isImm()); + uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3; + return reverseBits(Imm | RegBits) >> 22; +} + + +unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) + const { + // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4) + // as the displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5; + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isImm()); + uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2; + return reverseBits(Imm | RegBits) >> 22; +} + + +unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) + const { + // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2) + // as the displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5; + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isImm()); + uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1; + return reverseBits(Imm | RegBits) >> 22; +} + + unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 3ac0aca..7204bef 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "PPCFixupKinds.h" #include "PPCMCExpr.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -52,40 +53,56 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { } bool +PPCMCExpr::EvaluateAsConstant(int64_t &Res) const { + MCValue Value; + + if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr)) + return false; + + if (!Value.isAbsolute()) + return false; + + Res = EvaluateAsInt64(Value.getConstant()); + return true; +} + +int64_t +PPCMCExpr::EvaluateAsInt64(int64_t Value) const { + switch (Kind) { + case VK_PPC_LO: + return Value & 0xffff; + case VK_PPC_HI: + return (Value >> 16) & 0xffff; + case VK_PPC_HA: + return ((Value + 0x8000) >> 16) & 0xffff; + case VK_PPC_HIGHER: + return (Value >> 32) & 0xffff; + case VK_PPC_HIGHERA: + return ((Value + 0x8000) >> 32) & 0xffff; + case VK_PPC_HIGHEST: + return (Value >> 48) & 0xffff; + case VK_PPC_HIGHESTA: + return ((Value + 0x8000) >> 48) & 0xffff; + case VK_PPC_None: + break; + } + llvm_unreachable("Invalid kind!"); +} + +bool PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout)) + if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup)) return false; if (Value.isAbsolute()) { - int64_t Result = Value.getConstant(); - switch (Kind) { - default: - llvm_unreachable("Invalid kind!"); - case VK_PPC_LO: - Result = Result & 0xffff; - break; - case VK_PPC_HI: - Result = (Result >> 16) & 0xffff; - break; - case VK_PPC_HA: - Result = ((Result + 0x8000) >> 16) & 0xffff; - break; - case VK_PPC_HIGHER: - Result = (Result >> 32) & 0xffff; - break; - case VK_PPC_HIGHERA: - Result = ((Result + 0x8000) >> 32) & 0xffff; - break; - case VK_PPC_HIGHEST: - Result = (Result >> 48) & 0xffff; - break; - case VK_PPC_HIGHESTA: - Result = ((Result + 0x8000) >> 48) & 0xffff; - break; - } + int64_t Result = EvaluateAsInt64(Value.getConstant()); + if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) && + (Result >= 0x8000)) + return false; Res = MCValue::get(Result); } else { if (!Layout) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index bca4085..f0a6bb9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCMCEXPR_H -#define PPCMCEXPR_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" @@ -34,6 +34,8 @@ private: const MCExpr *Expr; bool IsDarwin; + int64_t EvaluateAsInt64(int64_t Value) const; + explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr, bool _IsDarwin) : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {} @@ -78,7 +80,8 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override; + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); @@ -87,6 +90,8 @@ public: // There are no TLS PPCMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} + bool EvaluateAsConstant(int64_t &Res) const; + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 7057797..00be8f4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -16,12 +16,16 @@ #include "PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -75,7 +79,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { if (TheTriple.isOSDarwin()) MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else - MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple); + MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple); // Initial state of the frame pointer is R1. unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; @@ -125,11 +129,20 @@ public: void emitMachine(StringRef CPU) override { OS << "\t.machine " << CPU << '\n'; } + void emitAbiVersion(int AbiVersion) override { + OS << "\t.abiversion " << AbiVersion << '\n'; + } + void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n'; + } }; class PPCTargetELFStreamer : public PPCTargetStreamer { public: PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + MCELFStreamer &getStreamer() { + return static_cast<MCELFStreamer &>(Streamer); + } void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation Streamer.EmitSymbolValue(&S, 8); @@ -138,6 +151,39 @@ public: // FIXME: Is there anything to do in here or does this directive only // limit the parser? } + void emitAbiVersion(int AbiVersion) override { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags &= ~ELF::EF_PPC64_ABI; + Flags |= (AbiVersion & ELF::EF_PPC64_ABI); + MCA.setELFHeaderEFlags(Flags); + } + void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + MCAssembler &MCA = getStreamer().getAssembler(); + MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S); + + int64_t Res; + if (!LocalOffset->EvaluateAsAbsolute(Res, MCA)) + report_fatal_error(".localentry expression must be absolute."); + + unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); + if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) + report_fatal_error(".localentry expression cannot be encoded."); + + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(Data) << 2; + Other &= ~ELF::STO_PPC64_LOCAL_MASK; + Other |= Encoded; + MCELF::setOther(Data, Other >> 2); + + // For GAS compatibility, unless we already saw a .abiversion directive, + // set e_flags to indicate ELFv2 ABI. + unsigned Flags = MCA.getELFHeaderEFlags(); + if ((Flags & ELF::EF_PPC64_ABI) == 0) + MCA.setELFHeaderEFlags(Flags | 2); + } }; class PPCTargetMachOStreamer : public PPCTargetStreamer { @@ -150,25 +196,27 @@ public: // FIXME: We should update the CPUType, CPUSubType in the Object file if // the new values are different from the defaults. } + void emitAbiVersion(int AbiVersion) override { + llvm_unreachable("Unknown pseudo-op: .abiversion"); + } + void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + llvm_unreachable("Unknown pseudo-op: .localentry"); + } }; } // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { + raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll) { if (Triple(TT).isOSDarwin()) { MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); new PPCTargetMachOStreamer(*S); return S; } - MCStreamer *S = - createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); new PPCTargetELFStreamer(*S); return S; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 474395b..68f7f7a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCMCTARGETDESC_H -#define PPCMCTARGETDESC_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index cff27ba..df2f14a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -80,7 +80,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } /// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum. -/// Outline based on PPCELFObjectWriter::getRelocTypeInner(). +/// Outline based on PPCELFObjectWriter::GetRelocType(). static unsigned getRelocType(const MCValue &Target, const MCFixupKind FixupKind, // from // Fixup.getKind() @@ -360,7 +360,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) + if (!SD->getSymbol().isUndefined()) FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 10e328a..6075631 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H -#define LLVM_TARGET_POWERPC_PPCPREDICATES_H +#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H +#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index c966748..cf516f4 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -13,7 +13,7 @@ TARGET = PPC # Make sure that tblgen is run, first thing. BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ - PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ + PPCGenAsmWriter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c42c5be..8fb33df 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_POWERPC_H -#define LLVM_TARGET_POWERPC_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H +#define LLVM_LIB_TARGET_POWERPC_PPC_H #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> @@ -26,7 +26,6 @@ namespace llvm { class PassRegistry; class FunctionPass; class ImmutablePass; - class JITCodeEmitter; class MachineInstr; class AsmPrinter; class MCInst; @@ -41,8 +40,6 @@ namespace llvm { FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); - FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, - JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); @@ -60,10 +57,11 @@ namespace llvm { // PPC Specific MachineOperand flags. MO_NO_FLAG, - /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and earlier. - MO_DARWIN_STUB = 1, + /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is + /// used for calls and jumps to external functions on Tiger and earlier, and + /// for PIC calls on Linux and ELF systems. + MO_PLT_OR_STUB = 1, /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). @@ -95,7 +93,12 @@ namespace llvm { MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4 + MO_TLS = 8 << 4, + + // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr + // call sequences. + MO_TLSLD = 9 << 4, + MO_TLSGD = 10 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index a9842b2..46d56a4 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -56,6 +56,8 @@ def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", "Use condition-register bits individually">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions">; +def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", + "Enable SPE instructions">; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", @@ -88,11 +90,23 @@ def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", "Enable the ldbrx instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; +def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true", + "Has only the msync instruction instead of sync", + [FeatureBookE]>; +def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true", + "Enable E500/E500mc instructions">; +def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true", + "Enable PPC 4xx instructions">; +def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true", + "Enable PPC 6xx instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", + "Enable POWER8 vector instructions", + [FeatureVSX, FeatureAltivec]>; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -105,7 +119,16 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", // CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions // POPCNTB p5 through p7 popcntb and related instructions -// VSX p7 vector-scalar instruction set + +//===----------------------------------------------------------------------===// +// ABI Selection // +//===----------------------------------------------------------------------===// + +def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1", + "Use the ELFv1 ABI">; + +def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2", + "Use the ELFv2 ABI">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -178,10 +201,12 @@ include "PPCInstrInfo.td" def : Processor<"generic", G3Itineraries, [Directive32]>; def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; + FeatureBookE, FeatureMSYNC, + DeprecatedMFTB]>; def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; + FeatureBookE, FeatureMSYNC, + DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index fd044d9..5648873 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -18,6 +18,7 @@ #include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" +#include "PPCMachineFunctionInfo.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCSubtarget.h" @@ -27,10 +28,12 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -100,9 +103,11 @@ namespace { } bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; void EmitFunctionEntryLabel() override; + void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; }; @@ -142,7 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) { void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -270,6 +275,18 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, printOperand(MI, OpNo, O); return false; } + case 'U': // Print 'u' for update form. + case 'X': // Print 'x' for indexed form. + { + // FIXME: Currently for PowerPC memory operands are always loaded + // into a register, so we never get an update or indexed form. + // This is bad even for offset forms, since even if we know we + // have a value in -16(r1), we will generate a load into r<n> + // and then load from 0(r<n>). Until that issue is fixed, + // tolerate 'U' and 'X' but don't output anything. + assert(MI->getOperand(OpNo).isReg()); + return false; + } } } @@ -285,7 +302,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; // To avoid name clash check if the name already exists. @@ -306,12 +323,35 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; bool isPPC64 = Subtarget.isPPC64(); + bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin(); + const Module *M = MF->getFunction()->getParent(); + PICLevel::Level PL = M->getPICLevel(); // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); + case PPC::MoveGOTtoLR: { + // Transform %LR = MoveGOTtoLR + // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 + // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding + // _GLOBAL_OFFSET_TABLE_) has exactly one instruction: + // blrl + // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local + MCSymbol *GOTSymbol = + OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + const MCExpr *OffsExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, + MCSymbolRefExpr::VK_PPC_LOCAL, + OutContext), + MCConstantExpr::Create(4, OutContext), + OutContext); + + // Emit the 'bl'. + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr)); + return; + } case PPC::MovePCtoLR: case PPC::MovePCtoLR8: { // Transform %LR = MovePCtoLR @@ -330,11 +370,85 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::UpdateGBR: { + // Transform %Rd = UpdateGBR(%Rt, %Ri) + // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri) + // add %Rd, %Rt, %Ri + // Get the offset from the GOT Base Register to the GOT + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + MCSymbol *PICOffset = + MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); + TmpInst.setOpcode(PPC::LWZ); + const MCExpr *Exp = + MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); + const MCExpr *PB = + MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), + MCSymbolRefExpr::VK_None, + OutContext); + const MCOperand TR = TmpInst.getOperand(1); + const MCOperand PICR = TmpInst.getOperand(0); + + // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri) + TmpInst.getOperand(1) = + MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext)); + TmpInst.getOperand(0) = TR; + TmpInst.getOperand(2) = PICR; + EmitToStreamer(OutStreamer, TmpInst); + + TmpInst.setOpcode(PPC::ADD4); + TmpInst.getOperand(0) = PICR; + TmpInst.getOperand(1) = TR; + TmpInst.getOperand(2) = PICR; + EmitToStreamer(OutStreamer, TmpInst); + return; + } + case PPC::LWZtoc: { + // Transform %R3 = LWZtoc <ga:@min1>, %R2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + + // Change the opcode to LWZ, and the global address operand to be a + // reference to the GOT entry we will synthesize later. + TmpInst.setOpcode(PPC::LWZ); + const MachineOperand &MO = MI->getOperand(1); + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); + MCSymbol *MOSymbol = nullptr; + if (MO.isGlobal()) + MOSymbol = getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + else if (MO.isBlockAddress()) + MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + + if (PL == PICLevel::Small) { + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + } else { + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None, + OutContext); + const MCExpr *PB = + MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")), + OutContext); + Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + } + EmitToStreamer(OutStreamer, TmpInst); + return; + } case PPC::LDtocJTI: case PPC::LDtocCPT: + case PPC::LDtocBA: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. @@ -342,7 +456,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(1); // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); MCSymbol *MOSymbol = nullptr; if (MO.isGlobal()) MOSymbol = getSymbol(MO.getGlobal()); @@ -350,6 +464,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); + else if (MO.isBlockAddress()) + MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); @@ -363,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStocHA: { // Transform %Xd = ADDIStocHA %X2, <ga:@sym> - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDIS8. If the global address is external, has // common linkage, is a non-local function address, or is a jump table @@ -371,7 +487,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) && + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || + MO.isBlockAddress()) && "Invalid operand for ADDIStocHA!"); MCSymbol *MOSymbol = nullptr; bool IsExternal = false; @@ -391,9 +508,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); + else if (MO.isBlockAddress()) + MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt || - MO.isJTI() || TM.getCodeModel() == CodeModel::Large) + MO.isJTI() || MO.isBlockAddress() || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -405,19 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDtocL: { // Transform %Xd = LDtocL <ga:@sym>, %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the // associated TOC entry. Otherwise reference the symbol directly. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || + MO.isBlockAddress()) && "Invalid operand for LDtocL!"); MCSymbol *MOSymbol = nullptr; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); + else if (MO.isBlockAddress()) { + MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); if (TM.getCodeModel() == CodeModel::Large) @@ -442,7 +567,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItocL: { // Transform %Xd = ADDItocL %Xs, <ga:@sym> - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the @@ -493,7 +618,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); @@ -508,6 +633,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + case PPC::PPC32PICGOT: { + MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + MCSymbol *GOTRef = OutContext.CreateTempSymbol(); + MCSymbol *NextInstr = OutContext.CreateTempSymbol(); + + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL) + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext))); + const MCExpr *OffsExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext), + MCSymbolRefExpr::Create(GOTRef, OutContext), + OutContext); + OutStreamer.EmitLabel(GOTRef); + OutStreamer.EmitValue(OffsExpr, 4); + OutStreamer.EmitLabel(NextInstr); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR) + .addReg(MI->getOperand(0).getReg())); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ) + .addReg(MI->getOperand(1).getReg()) + .addImm(0) + .addReg(MI->getOperand(0).getReg())); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(MI->getOperand(0).getReg())); + return; + } case PPC::PPC32GOT: { MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *SymGotTlsL = @@ -541,40 +694,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymGotTlsGD)); return; } - case PPC::ADDItlsgdL: { + case PPC::ADDItlsgdL: // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDItlsgdL32: { + // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@got@tlsgd const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, - OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymGotTlsGD)); - return; - } - case PPC::GETtlsADDR: { - // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); - const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? + MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO : + MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); return; } case PPC::ADDIStlsldHA: { @@ -593,72 +731,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymGotTlsLD)); return; } - case PPC::ADDItlsldL: { + case PPC::ADDItlsldL: // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDItlsldL32: { + // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@got@tlsld const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, - OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymGotTlsLD)); - return; - } - case PPC::GETtlsldADDR: { - // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); - const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? + MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO : + MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) - .addExpr(TlsRef) - .addExpr(SymVar)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); return; } - case PPC::ADDISdtprelHA: { + case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDISdtprelHA32: { + // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym> + // Into: %Rd = ADDIS %R3, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) - .addReg(MI->getOperand(0).getReg()) - .addReg(PPC::X3) - .addExpr(SymDtprel)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + .addReg(MI->getOperand(0).getReg()) + .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3) + .addExpr(SymDtprel)); return; } - case PPC::ADDIdtprelL: { + case PPC::ADDIdtprelL: // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> // Into: %Xd = ADDI8 %Xs, sym@dtprel@l - assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + case PPC::ADDIdtprelL32: { + // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym> + // Into: %Rd = ADDI %Rs, sym@dtprel@l const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(SymDtprel)); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); return; } case PPC::MFOCRF: @@ -713,14 +842,77 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); EmitToStreamer(OutStreamer, TmpInst); } +void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (Subtarget.isELFv2ABI()) { + PPCTargetStreamer *TS = + static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitAbiVersion(2); + } + + if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_) + return AsmPrinter::EmitStartOfAsmFile(M); + + if (M.getPICLevel() == PICLevel::Small) + return AsmPrinter::EmitStartOfAsmFile(M); + + OutStreamer.SwitchSection(OutContext.getELFSection(".got2", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly())); + + MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC")); + MCSymbol *CurrentPos = OutContext.CreateTempSymbol(); + + OutStreamer.EmitLabel(CurrentPos); + + // The GOT pointer points to the middle of the GOT, in order to reference the + // entire 64kB range. 0x8000 is the midpoint. + const MCExpr *tocExpr = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext), + MCConstantExpr::Create(0x8000, OutContext), + OutContext); + + OutStreamer.EmitAssignment(TOCSym, tocExpr); + + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); +} + void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { - if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. + // linux/ppc32 - Normal entry label. + if (!Subtarget.isPPC64() && + (TM.getRelocationModel() != Reloc::PIC_ || + MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small)) return AsmPrinter::EmitFunctionEntryLabel(); - + + if (!Subtarget.isPPC64()) { + const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); + if (PPCFI->usesPICBase()) { + MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(); + MCSymbol *PICBase = MF->getPICBaseSymbol(); + OutStreamer.EmitLabel(RelocSymbol); + + const MCExpr *OffsExpr = + MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")), + OutContext), + MCSymbolRefExpr::Create(PICBase, OutContext), + OutContext); + OutStreamer.EmitValue(OffsExpr, 4); + OutStreamer.EmitLabel(CurrentFnSym); + return; + } else + return AsmPrinter::EmitFunctionEntryLabel(); + } + + // ELFv2 ABI - Normal entry label. + if (Subtarget.isELFv2ABI()) + return AsmPrinter::EmitFunctionEntryLabel(); + // Emit an official procedure descriptor. MCSectionSubPair Current = OutStreamer.getCurrentSection(); const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", @@ -752,15 +944,22 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); bool isPPC64 = TD->getPointerSizeInBits() == 64; PPCTargetStreamer &TS = static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer()); - if (isPPC64 && !TOC.empty()) { - const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", + if (!TOC.empty()) { + const MCSectionELF *Section; + + if (isPPC64) + Section = OutStreamer.getContext().getELFSection(".toc", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + else + Section = OutStreamer.getContext().getELFSection(".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getReadOnly()); OutStreamer.SwitchSection(Section); @@ -768,8 +967,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); - MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); - TS.emitTCEntry(*S); + MCSymbol *S = I->first; + if (isPPC64) + TS.emitTCEntry(*S); + else + OutStreamer.EmitSymbolValue(S, 4); } } @@ -795,6 +997,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2. +void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { + // In the ELFv2 ABI, in functions that use the TOC register, we need to + // provide two entry points. The ABI guarantees that when calling the + // local entry point, r2 is set up by the caller to contain the TOC base + // for this function, and when calling the global entry point, r12 is set + // up by the caller to hold the address of the global entry point. We + // thus emit a prefix sequence along the following lines: + // + // func: + // # global entry point + // addis r2,r12,(.TOC.-func)@ha + // addi r2,r2,(.TOC.-func)@l + // .localentry func, .-func + // # local entry point, followed by function body + // + // This ensures we have r2 set up correctly while executing the function + // body, no matter which entry point is called. + if (Subtarget.isELFv2ABI() + // Only do all that if the function uses r2 in the first place. + && !MF->getRegInfo().use_empty(PPC::X2)) { + + MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(GlobalEntryLabel); + const MCSymbolRefExpr *GlobalEntryLabelExp = + MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + + MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + GlobalEntryLabelExp, OutContext); + + const MCExpr *TOCDeltaHi = + PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::X2) + .addReg(PPC::X12) + .addExpr(TOCDeltaHi)); + + const MCExpr *TOCDeltaLo = + PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addExpr(TOCDeltaLo)); + + MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(LocalEntryLabel); + const MCSymbolRefExpr *LocalEntryLabelExp = + MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + const MCExpr *LocalOffsetExp = + MCBinaryExpr::CreateSub(LocalEntryLabelExp, + GlobalEntryLabelExp, OutContext); + + PPCTargetStreamer *TS = + static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + } +} + /// EmitFunctionBodyEnd - Print the traceback table before the .size /// directive. /// @@ -886,7 +1150,8 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = + TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64; bool isDarwin = Subtarget.isDarwin(); const TargetLoweringObjectFileMachO &TLOFMacho = @@ -1022,7 +1287,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = + TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index ee90671..41594be 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "ppc-branch-select" @@ -64,7 +65,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { const PPCInstrInfo *TII = - static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); + static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index ec1e34d..5f3b176 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -214,7 +214,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (!TM) return true; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. @@ -384,10 +384,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { if (!TM) return true; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - if (TLI->supportJumpTables() && - SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) + if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } } diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index d48164d..cf8fee4 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -14,9 +14,15 @@ /// CCIfSubtarget - Match if the current subtarget has a feature F. class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + : CCIf<!strconcat("static_cast<const PPCSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; class CCIfNotSubtarget<string F, CCAction A> - : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + : CCIf<!strconcat("!static_cast<const PPCSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention @@ -31,13 +37,18 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + + // Floating point types returned as "direct" go into F1 .. F8; note that + // only the ELFv2 ABI fully utilizes all these registers. + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - CCIfType<[f32], CCAssignToReg<[F1, F2]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, - - // Vector types are always returned in V2. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>, - CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>> + // Vector types returned as "direct" go into V2 .. V9; note that only the + // ELFv2 ABI fully utilizes all these registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; @@ -69,10 +80,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i32], CCPromoteToType<i64>>, CCIfType<[i64], CCAssignToReg<[X3, X4]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1, F2]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>, - CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>> + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp deleted file mode 100644 index 0875523..0000000 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ /dev/null @@ -1,293 +0,0 @@ -//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to -// JIT-compile bitcode to native PowerPC. -// -//===----------------------------------------------------------------------===// - -#include "PPC.h" -#include "PPCRelocations.h" -#include "PPCTargetMachine.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/PassManager.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" -using namespace llvm; - -namespace { - class PPCCodeEmitter : public MachineFunctionPass { - TargetMachine &TM; - JITCodeEmitter &MCE; - MachineModuleInfo *MMI; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineModuleInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - static char ID; - - /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record - /// its address in the function into this pointer. - void *MovePCtoLROffset; - public: - - PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), TM(tm), MCE(mce) {} - - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - - - MachineRelocation GetRelocation(const MachineOperand &MO, - unsigned RelocID) const; - - /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const; - - unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getAbsDirectBrEncoding(const MachineInstr &MI, - unsigned OpNo) const; - unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; - - unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; - - const char *getPassName() const override { - return "PowerPC Machine Code Emitter"; - } - - /// runOnMachineFunction - emits the given MachineFunction to memory - /// - bool runOnMachineFunction(MachineFunction &MF) override; - - /// emitBasicBlock - emits the given MachineBasicBlock to memory - /// - void emitBasicBlock(MachineBasicBlock &MBB); - }; -} - -char PPCCodeEmitter::ID = 0; - -/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code -/// to the specified MCE object. -FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, - JITCodeEmitter &JCE) { - return new PPCCodeEmitter(TM, JCE); -} - -bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - assert((MF.getTarget().getRelocationModel() != Reloc::Default || - MF.getTarget().getRelocationModel() != Reloc::Static) && - "JIT relocation model must be set to static or default!"); - - MMI = &getAnalysis<MachineModuleInfo>(); - MCE.setModuleInfo(MMI); - do { - MovePCtoLROffset = nullptr; - MCE.startFunction(MF); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - emitBasicBlock(*BB); - } while (MCE.finishFunction(MF)); - - return false; -} - -void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { - MCE.StartMachineBasicBlock(&MBB); - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ - const MachineInstr &MI = *I; - MCE.processDebugLoc(MI.getDebugLoc(), true); - switch (MI.getOpcode()) { - default: - MCE.emitWordBE(getBinaryCodeForInstr(MI)); - break; - case TargetOpcode::CFI_INSTRUCTION: - break; - case TargetOpcode::EH_LABEL: - MCE.emitLabel(MI.getOperand(0).getMCSymbol()); - break; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - break; // pseudo opcode, no side effects - case PPC::MovePCtoLR: - case PPC::MovePCtoLR8: - assert(TM.getRelocationModel() == Reloc::PIC_); - MovePCtoLROffset = (void*)MCE.getCurrentPCValue(); - MCE.emitWordBE(0x48000005); // bl 1 - break; - } - MCE.processDebugLoc(MI.getDebugLoc(), false); - } -} - -unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || - MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && - (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); -} - -MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, - unsigned RelocID) const { - // If in PIC mode, we need to encode the negated address of the - // 'movepctolr' into the unrelocated field. After relocation, we'll have - // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm - // field, we get &gv. This doesn't happen for branch relocations, which are - // always implicitly pc relative. - intptr_t Cst = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); - Cst = -(intptr_t)MovePCtoLROffset - 4; - } - - if (MO.isGlobal()) - return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID, - const_cast<GlobalValue *>(MO.getGlobal()), - Cst, isa<Function>(MO.getGlobal())); - if (MO.isSymbol()) - return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - RelocID, MO.getSymbolName(), Cst); - if (MO.isCPI()) - return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), - RelocID, MO.getIndex(), Cst); - - if (MO.isMBB()) - return MachineRelocation::getBB(MCE.getCurrentPCOffset(), - RelocID, MO.getMBB()); - - assert(MO.isJTI()); - return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), - RelocID, MO.getIndex(), Cst); -} - -unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); - - MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx)); - return 0; -} - -unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); - return 0; -} - -unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); - - llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); -} - -unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI, - unsigned OpNo) const { - llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); -} - -unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI, - unsigned OpNo) const { - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); - - unsigned RelocID; - switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) { - default: llvm_unreachable("Unsupported target operand flags!"); - case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break; - case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break; - } - - MCE.addRelocation(GetRelocation(MO, RelocID)); - return 0; -} - -unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI, - unsigned OpNo) const { - // Encode (imm, reg) as a memri, which has the low 16-bits as the - // displacement and the next 5 bits as the register #. - assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16; - - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isImm()) - return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits; - - // Add a fixup for the displacement field. - MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); - return RegBits; -} - -unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, - unsigned OpNo) const { - // Encode (imm, reg) as a memrix, which has the low 14-bits as the - // displacement and the next 5 bits as the register #. - assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14; - - const MachineOperand &MO = MI.getOperand(OpNo); - if (MO.isImm()) - return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits; - - MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); - return RegBits; -} - - -unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, - unsigned OpNo) const { - llvm_unreachable("TLS not supported on the old JIT."); - return 0; -} - -unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI, - unsigned OpNo) const { - llvm_unreachable("TLS not supported on the old JIT."); - return 0; -} - -unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { - - if (MO.isReg()) { - // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. - // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && - MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || - MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); - } - - assert(MO.isImm() && - "Relocation required in an instruction that we cannot encode!"); - return MO.getImm(); -} - -#include "PPCGenCodeEmitter.inc" diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 92a0ec1..1149354 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -39,7 +39,7 @@ //===----------------------------------------------------------------------===// // // TBD: -// FastLowerArguments: Handle simple cases. +// fastLowerArguments: Handle simple cases. // PPCMaterializeGV: Handle TLS. // SelectCall: Handle function pointers. // SelectCall: Handle multi-register return values. @@ -92,30 +92,29 @@ class PPCFastISel final : public FastISel { public: explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) - : FastISel(FuncInfo, LibInfo), - TM(FuncInfo.MF->getTarget()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()), - PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()), - Context(&FuncInfo.Fn->getContext()) { } + : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), + TII(*TM.getSubtargetImpl()->getInstrInfo()), + TLI(*TM.getSubtargetImpl()->getTargetLowering()), + PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()), + Context(&FuncInfo.Fn->getContext()) {} // Backend specific FastISel code. private: - bool TargetSelectInstruction(const Instruction *I) override; - unsigned TargetMaterializeConstant(const Constant *C) override; - unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool fastSelectInstruction(const Instruction *I) override; + unsigned fastMaterializeConstant(const Constant *C) override; + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; - bool FastLowerArguments() override; - unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; - unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + bool fastLowerArguments() override; + unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned FastEmitInst_r(unsigned MachineInstOpcode, + unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); - unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); @@ -153,7 +152,7 @@ class PPCFastISel final : public FastISel { unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); - unsigned PPCMaterializeInt(const Constant *C, MVT VT); + unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, @@ -560,7 +559,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -707,7 +706,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -715,7 +714,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { dyn_cast<ConstantInt>(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DbgLoc); + fastEmitBranch(Target, DbgLoc); return true; } @@ -838,7 +837,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) { return false; // No code is generated for a FP extend. - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } @@ -860,12 +859,12 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) .addReg(SrcReg); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } // Move an i32 or i64 value in a GPR to an f64 value in an FPR. -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte @@ -898,10 +897,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, if (SrcVT == MVT::i32) { if (!IsSigned) { LoadOpc = PPC::LFIWZX; - Addr.Offset = 4; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; - Addr.Offset = 4; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; } } @@ -979,13 +978,13 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(FPReg); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } // Move the floating-point value in SrcReg into an integer destination // register, and return the register (or zero if we can't handle it). -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, @@ -1080,7 +1079,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IntReg == 0) return false; - UpdateValueMap(I, IntReg); + updateValueMap(I, IntReg); return true; } @@ -1169,7 +1168,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { ResultReg) .addReg(SrcReg1) .addImm(Imm); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } } @@ -1185,7 +1184,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1200,10 +1199,12 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, unsigned &NumBytes, bool IsVarArg) { SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); // Reserve space for the linkage area on the stack. - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); + bool isELFv2ABI = PPCSubTarget->isELFv2ABI(); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); CCInfo.AllocateStack(LinkageSize, 8); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); @@ -1232,6 +1233,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. NumBytes = std::max(NumBytes, LinkageSize + 64); // Issue CALLSEQ_START. @@ -1318,7 +1320,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); @@ -1364,7 +1366,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, assert(ResultReg && "ResultReg unset!"); UsedRegs.push_back(SourcePhysReg); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } } @@ -1408,7 +1410,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && RetVT != MVT::f64) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); if (RVLocs.size() > 1) return false; @@ -1498,6 +1500,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (PPCSubTarget->isELFv2ABI()) + MIB.addReg(PPC::X2, RegState::Implicit); + // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); @@ -1531,7 +1537,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context); CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); const Value *RV = Ret->getOperand(0); @@ -1541,13 +1547,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) { // Special case for returning a constant integer of any size. // Materialize the constant as an i64 and copy it to the return - // register. This avoids an unnecessary extend or truncate. + // register. We still need to worry about properly extending the sign. E.g: + // If the constant has only one bit, it means it is a boolean. Therefore + // we can't use PPCMaterializeInt because it extends the sign which will + // cause negations of the returned value to be incorrect as they are + // implemented as the flip of the least significant bit. if (isa<ConstantInt>(*RV)) { const Constant *C = cast<Constant>(RV); - unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); - unsigned RetReg = ValLocs[0].getLocReg(); + + CCValAssign &VA = ValLocs[0]; + + unsigned RetReg = VA.getLocReg(); + unsigned SrcReg = PPCMaterializeInt(C, MVT::i64, + VA.getLocInfo() == CCValAssign::SExt); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); + RetRegs.push_back(RetReg); } else { @@ -1714,7 +1730,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) { SrcReg = ResultReg; } - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } @@ -1753,13 +1769,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // Attempt to fast-select an instruction that wasn't handled by // the table-generated machinery. -bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { +bool PPCFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Load: @@ -2007,7 +2023,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). -unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { +unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, + bool UseSExt) { // If we're using CR bit registers for i1 values, handle that as a special // case first. if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { @@ -2031,7 +2048,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) - .addImm(CI->getSExtValue()); + .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() ); return ImmReg; } @@ -2048,7 +2065,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). -unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { +unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. @@ -2067,7 +2084,7 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) { // Materialize the address created by an alloca into a register, and // return the register number (or zero if we failed to handle it). -unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { +unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -2167,7 +2184,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // Attempt to lower call arguments in a faster way than done by // the selection DAG code. -bool PPCFastISel::FastLowerArguments() { +bool PPCFastISel::fastLowerArguments() { // Defer to normal argument lowering for now. It's reasonably // efficient. Consider doing something like ARM to handle the // case where all args fit in registers, no varargs, no float @@ -2177,7 +2194,7 @@ bool PPCFastISel::FastLowerArguments() { // Handle materializing integer constants into a register. This is not // automatically generated for PowerPC, so must be explicitly created here. -unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { +unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; @@ -2214,7 +2231,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { // assigning R0 or X0 to the output register for GPRC and G8RC // register classes, as any such result could be used in ADDI, etc., // where those regs have another meaning. -unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { @@ -2227,27 +2244,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC, + return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Op0IsKill, Imm); } // Override for instructions with one register operand to avoid use of // R0/X0. The automatic infrastructure isn't aware of the context so // we must be conservative. -unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill) { const TargetRegisterClass *UseRC = (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); + return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); } // Override for instructions with two register operands to avoid use // of R0/X0. The automatic infrastructure isn't aware of the context // so we must be conservative. -unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, +unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { @@ -2255,7 +2272,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); - return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, + return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, Op1, Op1IsKill); } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 65e9cf2..dc87a6c 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -254,7 +254,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // transform this into the appropriate ORI instruction. static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MachineFunction *MF = MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); unsigned UsedRegMask = 0; @@ -372,7 +372,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need @@ -400,7 +400,8 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Maximum call frame needs to be at least big enough for linkage area. unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(), - Subtarget.isDarwinABI()); + Subtarget.isDarwinABI(), + Subtarget.isELFv2ABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so @@ -459,9 +460,9 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg; + unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -497,21 +498,23 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); assert((isDarwinABI || isSVR4ABI) && "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); @@ -546,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; @@ -602,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, + isDarwinABI, + isPIC); } } @@ -623,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { "Prologue CR saving supported only in 64-bit mode"); if (!MustSaveCRs.empty()) { // will only occur for PPC64 + // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it could be more + // efficient to use mfocrf to selectively save just those fields. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) @@ -791,8 +799,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for + // the whole CR word. In the ELFv2 ABI, every CR that was + // actually saved gets its own CFI record. + unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8)); + nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; @@ -812,9 +824,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI != MBB.end() && "Returning block has no terminator"); const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl; @@ -839,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -849,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg @@ -890,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, + isDarwinABI, + isPIC); } } @@ -1054,7 +1069,7 @@ void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -1067,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPSI = FI->getFramePointerSaveIndex(); bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -1081,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int BPSI = FI->getBasePointerSaveIndex(); if (!BPSI && RegInfo->hasBasePointer(MF)) { - int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI); + int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC); // Allocate the frame index for the base pointer save area. BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); // Save the result. @@ -1185,7 +1201,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); int64_t LowerBound = 0; @@ -1220,7 +1236,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } const PPCRegisterInfo *RegInfo = - static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); if (RegInfo->hasBasePointer(MF)) { HasGPSaveArea = true; @@ -1368,7 +1384,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo()); DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; @@ -1430,7 +1446,7 @@ restoreCRs(bool isPPC64, bool is31, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo()); DebugLoc DL; unsigned RestoreOp, MoveReg; @@ -1463,7 +1479,7 @@ void PPCFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (MF.getTarget().Options.GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. @@ -1513,7 +1529,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = - *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo()); bool CR2Spilled = false; bool CR3Spilled = false; bool CR4Spilled = false; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 7a226f7..c482588 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC_FRAMEINFO_H -#define POWERPC_FRAMEINFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H +#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H #include "PPC.h" #include "llvm/ADT/STLExtras.h" @@ -76,8 +76,8 @@ public: /// getTOCSaveOffset - Return the previous frame offset to save the /// TOC register -- 64-bit SVR4 ABI only. - static unsigned getTOCSaveOffset(void) { - return 40; + static unsigned getTOCSaveOffset(bool isELFv2ABI) { + return isELFv2ABI ? 24 : 40; } /// getFramePointerSaveOffset - Return the previous frame offset to save the @@ -97,19 +97,22 @@ public: /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) { + static unsigned getBasePointerSaveOffset(bool isPPC64, + bool isDarwinABI, + bool isPIC) { if (isDarwinABI) return isPPC64 ? -16U : -8U; // SVR4 ABI: First slot in the general register save area. - return isPPC64 ? -16U : -8U; + return isPPC64 ? -16U : isPIC ? -12U : -8U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI, + bool isELFv2ABI) { if (isDarwinABI || isPPC64) - return 6 * (isPPC64 ? 8 : 4); + return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4); // SVR4 ABI: return 8; diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 23f76c1..4b50214 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCHAZRECS_H -#define PPCHAZRECS_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H +#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H #include "PPCInstrInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" @@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { public: PPCHazardRecognizer970(const ScheduleDAG &DAG); - virtual HazardType getHazardType(SUnit *SU, int Stalls) override; - virtual void EmitInstruction(SUnit *SU) override; - virtual void AdvanceCycle() override; - virtual void Reset() override; + HazardType getHazardType(SUnit *SU, int Stalls) override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void Reset() override; private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4881b3f..49ba58b 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -26,6 +27,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -56,16 +58,16 @@ namespace { unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) - : SelectionDAGISel(tm), TM(tm), - PPCLowering(TM.getTargetLowering()), - PPCSubTarget(TM.getSubtargetImpl()) { + : SelectionDAGISel(tm), TM(tm), + PPCLowering(TM.getSubtargetImpl()->getTargetLowering()), + PPCSubTarget(TM.getSubtargetImpl()) { initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; - PPCLowering = TM.getTargetLowering(); + PPCLowering = TM.getSubtargetImpl()->getTargetLowering(); PPCSubTarget = TM.getSubtargetImpl(); SelectionDAGISel::runOnMachineFunction(MF); @@ -232,7 +234,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); DebugLoc dl; // Emit the following code into the entry block: @@ -268,16 +270,34 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { /// SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { - const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + const Module *M = MF->getFunction()->getParent(); DebugLoc dl; if (PPCLowering->getPointerTy() == MVT::i32) { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); - BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); - BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + if (PPCSubTarget->isTargetELF()) { + GlobalBaseReg = PPC::R30; + if (M->getPICLevel() == PICLevel::Small) { + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + } else { + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(FirstMBB, MBBI, dl, + TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg) + .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); + MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); + } + } else { + GlobalBaseReg = + RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + } } else { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); @@ -650,94 +670,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, // only support the altivec types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC, - bool HasVSX) { - switch (CC) { - case ISD::SETEQ: - case ISD::SETUEQ: - case ISD::SETNE: - case ISD::SETUNE: - if (VecVT == MVT::v16i8) - return PPC::VCMPEQUB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPEQUH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPEQUW; - // v4f32 != v4f32 could be translate to unordered not equal - else if (VecVT == MVT::v4f32) - return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; - else if (VecVT == MVT::v2f64) - return PPC::XVCMPEQDP; - break; - case ISD::SETLT: - case ISD::SETGT: - case ISD::SETLE: - case ISD::SETGE: - if (VecVT == MVT::v16i8) - return PPC::VCMPGTSB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPGTSH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPGTSW; - else if (VecVT == MVT::v4f32) - return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; - else if (VecVT == MVT::v2f64) - return PPC::XVCMPGTDP; - break; - case ISD::SETULT: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULE: - if (VecVT == MVT::v16i8) - return PPC::VCMPGTUB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPGTUH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPGTUW; - break; - case ISD::SETOEQ: - if (VecVT == MVT::v4f32) - return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; - else if (VecVT == MVT::v2f64) - return PPC::XVCMPEQDP; - break; - case ISD::SETOLT: - case ISD::SETOGT: - case ISD::SETOLE: - if (VecVT == MVT::v4f32) - return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; - else if (VecVT == MVT::v2f64) - return PPC::XVCMPGTDP; - break; - case ISD::SETOGE: - if (VecVT == MVT::v4f32) - return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; - else if (VecVT == MVT::v2f64) - return PPC::XVCMPGEDP; - break; - default: - break; - } - llvm_unreachable("Invalid integer vector compare condition"); -} +static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, + bool HasVSX, bool &Swap, bool &Negate) { + Swap = false; + Negate = false; -// getVCmpEQInst: return the equal compare instruction for the specified vector -// type. Since this is for altivec specific code, only support the altivec -// types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) { - switch (VecVT) { - case MVT::v16i8: - return PPC::VCMPEQUB; - case MVT::v8i16: - return PPC::VCMPEQUH; - case MVT::v4i32: - return PPC::VCMPEQUW; - case MVT::v4f32: - return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; - case MVT::v2f64: - return PPC::XVCMPEQDP; - default: - llvm_unreachable("Invalid integer vector compare condition"); + if (VecVT.isFloatingPoint()) { + /* Handle some cases by swapping input operands. */ + switch (CC) { + case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; + case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; + case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; + case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; + case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; + case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; + default: break; + } + /* Handle some cases by negating the result. */ + switch (CC) { + case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; + case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; + case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; + case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; + default: break; + } + /* We have instructions implementing the remaining cases. */ + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPEQDP; + break; + case ISD::SETGT: + case ISD::SETOGT: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGTDP; + break; + case ISD::SETGE: + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGEDP; + break; + default: + break; + } + llvm_unreachable("Invalid floating-point vector compare condition"); + } else { + /* Handle some cases by swapping input operands. */ + switch (CC) { + case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; + case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; + case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; + case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; + default: break; + } + /* Handle some cases by negating the result. */ + switch (CC) { + case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; + case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; + case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; + case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; + default: break; + } + /* We have instructions implementing the remaining cases. */ + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + break; + case ISD::SETGT: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + break; + case ISD::SETUGT: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); } } @@ -829,60 +860,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { EVT VecVT = LHS.getValueType(); - MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; - unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX()); - - switch (CC) { - case ISD::SETEQ: - case ISD::SETOEQ: - case ISD::SETUEQ: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - case ISD::SETNE: - case ISD::SETONE: - case ISD::SETUNE: { - SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : - PPC::VNOR, - VecVT, VCmp, VCmp); - } - case ISD::SETLT: - case ISD::SETOLT: - case ISD::SETULT: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); - case ISD::SETGT: - case ISD::SETOGT: - case ISD::SETUGT: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - case ISD::SETGE: - case ISD::SETOGE: - case ISD::SETUGE: { - // Small optimization: Altivec provides a 'Vector Compare Greater Than - // or Equal To' instruction (vcmpgefp), so in this case there is no - // need for extra logic for the equal compare. - if (VecVT.getSimpleVT().isFloatingPoint()) { - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - } else { - SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); - SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : - PPC::VOR, - VecVT, VCmpGT, VCmpEQ); - } - } - case ISD::SETLE: - case ISD::SETOLE: - case ISD::SETULE: { - SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); - SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : - PPC::VOR, - VecVT, VCmpLE, VCmpEQ); - } - default: - llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + bool Swap, Negate; + unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, + PPCSubTarget->hasVSX(), Swap, Negate); + if (Swap) + std::swap(LHS, RHS); + + if (Negate) { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : + PPC::VNOR, + VecVT, VCmp, VCmp); } + + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); } if (PPCSubTarget->useCRBits()) @@ -924,6 +915,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return nullptr; // Already selected. } + // In case any misguided DAG-level optimizations form an ADD with a + // TargetConstant operand, crash here instead of miscompiling (by selecting + // an r+r add instead of some kind of r+i add). + if (N->getOpcode() == ISD::ADD && + N->getOperand(1).getOpcode() == ISD::TargetConstant) + llvm_unreachable("Invalid ADD with TargetConstant operand"); + switch (N->getOpcode()) { default: break; @@ -1331,7 +1329,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { else if (N->getValueType(0) == MVT::f32) SelectCCOp = PPC::SELECT_CC_F4; else if (N->getValueType(0) == MVT::f64) - SelectCCOp = PPC::SELECT_CC_F8; + if (PPCSubTarget->hasVSX()) + SelectCCOp = PPC::SELECT_CC_VSFRC; + else + SelectCCOp = PPC::SELECT_CC_F8; + else if (N->getValueType(0) == MVT::v2f64 || + N->getValueType(0) == MVT::v2i64) + SelectCCOp = PPC::SELECT_CC_VSRC; else SelectCCOp = PPC::SELECT_CC_VRRC; @@ -1445,11 +1449,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { - assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI"); + assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && + "Only supported for 64-bit ABI and 32-bit SVR4"); + if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { + SDValue GA = N->getOperand(0); + return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + N->getOperand(1)); + } // For medium and large code model, we generate two instructions as // described below. Otherwise we allow SelectCodeCommon to handle this, - // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. CodeModel::Model CModel = TM.getCodeModel(); if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; @@ -1466,7 +1476,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large) + if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || + CModel == CodeModel::Large) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); @@ -1483,6 +1494,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } + case PPCISD::PPC32_PICGOT: { + // Generate a PIC-safe GOT reference. + assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && + "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); + return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32); + } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. @@ -1683,7 +1700,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: - case PPC::SELECT_VRRC: { + case PPC::SELECT_VRRC: + case PPC::SELECT_VSFRC: + case PPC::SELECT_VSRC: { SDValue Op = MachineNode->getOperand(0); if (Op.isMachineOpcode()) { if (Op.getMachineOpcode() == PPC::CRSET) @@ -1989,6 +2008,8 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_F4: case PPC::SELECT_F8: case PPC::SELECT_VRRC: + case PPC::SELECT_VSFRC: + case PPC::SELECT_VSRC: if (Op1Set) ResNode = MachineNode->getOperand(1).getNode(); else if (Op1Unset) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bc057bf..e93bdaf 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -39,6 +39,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +// FIXME: Remove this once soft-float is supported. +static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", +cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); + static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -51,19 +55,10 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); // FIXME: Remove this once the bug has been fixed! extern cl::opt<bool> ANDIGlueBug; -static TargetLoweringObjectFile *createTLOF(const Triple &TT) { - // If it isn't a Mach-O file then it's going to be a linux ELF - // object file. - if (TT.isOSDarwin()) - return new TargetLoweringObjectFileMachO(); - - return new PPC64LinuxTargetObjectFile(); -} - -PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) - : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))), +PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) + : TargetLowering(TM), Subtarget(*TM.getSubtargetImpl()) { - setPow2DivIsCheap(); + setPow2SDivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); @@ -453,6 +448,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); @@ -526,11 +523,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); @@ -561,11 +553,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); - setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand); - setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand); - setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand); - setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand); - setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); @@ -617,15 +604,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); } - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + if (!isPPC64) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + } setBooleanContents(ZeroOrOneBooleanContent); // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (!isPPC64) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + } + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); @@ -685,11 +679,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); - if (isPPC64 && Subtarget.isJITCodeModel()) - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - setSupportJumpTables(false); - setInsertFencesForAtomic(true); if (Subtarget.enableMachineScheduler()) @@ -782,6 +771,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; + case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; + case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; @@ -811,10 +802,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; - case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; - case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; @@ -828,6 +817,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { return VT.changeVectorElementTypeToInteger(); } +bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { + assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); + return true; +} + //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// @@ -853,14 +847,27 @@ static bool isConstantOrUndef(int Op, int Val) { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; - if (!isUnary) { + bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); + if (ShuffleKind == 0) { + if (IsLE) + return false; for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+j)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!IsLE) + return false; + for (unsigned i = 0; i != 16; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = IsLE ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) @@ -871,27 +878,34 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - unsigned j, k; - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - j = 0; - k = 1; - } else { - j = 2; - k = 3; - } - if (!isUnary) { + bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); + if (ShuffleKind == 0) { + if (IsLE) + return false; for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+k)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!IsLE) + return false; + for (unsigned i = 0; i != 16; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = IsLE ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+9), i*2+k)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; @@ -919,38 +933,63 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - if (!isUnary) + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + else + return false; } else { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + else + return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - if (!isUnary) + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + else + return false; } else { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + else + return false; } } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) { +/// The ShuffleKind distinguishes between big-endian operations with two +/// different inputs (0), either-endian operations with two identical inputs +/// (1), and little-endian operations with two different inputs (2). For the +/// latter, the input operands are swapped (see PPCInstrAltivec.td). +int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; @@ -968,38 +1007,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) { unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - - ShiftAmt += i; - - if (!isUnary) { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i)) - return -1; - } else { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15)) - return -1; - } - - } else { // Big Endian + ShiftAmt -= i; + bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()-> + isLittleEndian(); + + if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + } else if (ShuffleKind == 1) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) + return -1; + } else + return -1; - ShiftAmt -= i; + if (ShuffleKind == 2 && isLE) + ShiftAmt = 16 - ShiftAmt; - if (!isUnary) { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) - return -1; - } else { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) - return -1; - } - } return ShiftAmt; } @@ -1055,7 +1082,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); - if (DAG.getTarget().getDataLayout()->isLittleEndian()) + if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; @@ -1331,7 +1358,13 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. - Base = N.getOperand(0); + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } Disp = DAG.getTargetConstant(imm, N.getValueType()); return true; } @@ -1491,10 +1524,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; - // Don't use the pic base if not in PIC relocation model. Or if we are on a - // non-darwin platform. We don't support PIC on other platforms yet. - bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget<PPCSubtarget>().isDarwin(); + // Don't use the pic base if not in PIC relocation model. + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; + if (isPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; @@ -1550,6 +1582,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), + PPCII::MO_PIC_FLAG); + SDLoc DL(CP); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + } + SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = @@ -1571,6 +1612,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + PPCII::MO_PIC_FLAG); + SDLoc DL(GA); + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + } + SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); @@ -1579,8 +1629,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); + BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); + const BlockAddress *BA = BASDN->getBlockAddress(); - const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual BlockAddress is stored in the TOC. + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); @@ -1589,6 +1647,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } +// Generate a call to __tls_get_addr for the given GOT entry Op. +std::pair<SDValue,SDValue> +PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, + SelectionDAG &DAG) const { + + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Op; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, IntPtrTy, + DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), + std::move(Args), 0); + + return LowerCallTo(CLI); +} + SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1601,6 +1680,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); bool is64bit = Subtarget.isPPC64(); + const Module *M = DAG.getMachineFunction().getFunction()->getParent(); + PICLevel::Level picLevel = M->getPICLevel(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); @@ -1632,50 +1713,46 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, } if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, - GOTReg, TGA); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLSGD); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + } else { + if (picLevel == PICLevel::Small) + GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); + else + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTEntryHi, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); - SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLS_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); - return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + GOTPtr, TGA); + std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); + return CallResult.first; } if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, - GOTReg, TGA); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TLSLD); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + } else { + if (picLevel == PICLevel::Small) + GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); + else + GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); + } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, - GOTEntryHi, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); - SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLSLD_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. - Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + GOTPtr, TGA); + std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); + SDValue TLSAddr = CallResult.first; + SDValue Chain = CallResult.second; SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, ParmReg, TGA); + Chain, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } @@ -1700,6 +1777,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, + GSDN->getOffset(), + PPCII::MO_PIC_FLAG); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); + } + SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = @@ -1794,7 +1879,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, VAListPtr, MachinePointerInfo(SV), MVT::i8, - false, false, 0); + false, false, false, 0); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { @@ -1817,7 +1902,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, FprPtr, MachinePointerInfo(SV), MVT::i8, - false, false, 0); + false, false, false, 0); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, @@ -2127,14 +2212,19 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); - ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + + // Round up to multiples of the pointer size, except for array members, + // which are always packed. + if (!Flags.isInConsecutiveRegs()) + ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } /// CalculateStackSlotAlignment - Calculates the alignment of this argument /// on the stack. -static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags, +static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, + ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned Align = PtrByteSize; @@ -2156,14 +2246,78 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags, } } + // Array members are always packed to their original alignment. + if (Flags.isInConsecutiveRegs()) { + // If the array member was split into multiple registers, the first + // needs to be aligned to the size of the full type. (Except for + // ppcf128, which is only aligned as its f64 components.) + if (Flags.isSplit() && OrigVT != MVT::ppcf128) + Align = OrigVT.getStoreSize(); + else + Align = ArgVT.getStoreSize(); + } + return Align; } +/// CalculateStackSlotUsed - Return whether this argument will use its +/// stack slot (instead of being passed in registers). ArgOffset, +/// AvailableFPRs, and AvailableVRs must hold the current argument +/// position, and will be updated to account for this argument. +static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, + ISD::ArgFlagsTy Flags, + unsigned PtrByteSize, + unsigned LinkageSize, + unsigned ParamAreaSize, + unsigned &ArgOffset, + unsigned &AvailableFPRs, + unsigned &AvailableVRs) { + bool UseMemory = false; + + // Respect alignment of argument on the stack. + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + // If there's no space left in the argument save area, we must + // use memory (this check also catches zero-sized arguments). + if (ArgOffset >= LinkageSize + ParamAreaSize) + UseMemory = true; + + // Allocate argument on the stack. + ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // If we overran the argument save area, we must use memory + // (this check catches arguments passed partially in memory) + if (ArgOffset > LinkageSize + ParamAreaSize) + UseMemory = true; + + // However, if the argument is actually passed in an FPR or a VR, + // we don't use memory after all. + if (!Flags.isByVal()) { + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (AvailableFPRs > 0) { + --AvailableFPRs; + return false; + } + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + if (AvailableVRs > 0) { + --AvailableVRs; + return false; + } + } + + return UseMemory; +} + /// EnsureStackAlignment - Round stack frame size up from NumBytes to /// ensure minimum alignment required for target. static unsigned EnsureStackAlignment(const TargetMachine &Target, unsigned NumBytes) { - unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment(); + unsigned TargetAlign = + Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); unsigned AlignMask = TargetAlign - 1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; return NumBytes; @@ -2240,11 +2394,11 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); // Reserve space for the linkage area on the stack. - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -2315,7 +2469,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // caller's stack frame, right above the parameter list area. SmallVector<CCValAssign, 16> ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -2348,7 +2502,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; - const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); + unsigned NumFPArgRegs = array_lengthof(FPArgRegs); + if (DisablePPCFloatInVariadic) + NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs)); @@ -2357,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + - NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, @@ -2399,7 +2555,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } @@ -2437,6 +2593,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // + bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2448,8 +2605,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); - unsigned ArgOffset = LinkageSize; + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, @@ -2471,12 +2628,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); - unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; + // Do a first pass over the arguments to determine whether the ABI + // guarantees that our caller has allocated the parameter save area + // on its stack frame. In the ELFv1 ABI, this is always the case; + // in the ELFv2 ABI, it is true if this is a vararg function or if + // any parameter is located in a stack slot. + + bool HasParameterArea = !isELFv2ABI || isVarArg; + unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; + unsigned NumBytes = LinkageSize; + unsigned AvailableFPRs = Num_FPR_Regs; + unsigned AvailableVRs = Num_VR_Regs; + for (unsigned i = 0, e = Ins.size(); i != e; ++i) + if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytes, AvailableFPRs, AvailableVRs)) + HasParameterArea = true; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. + unsigned ArgOffset = LinkageSize; + unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; SmallVector<SDValue, 8> MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2484,6 +2658,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; + EVT OrigVT = Ins[ArgNo].ArgVT; unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; @@ -2492,7 +2667,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( /* Respect alignment of argument on the stack. */ unsigned Align = - CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize); + CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; unsigned CurArgOffset = ArgOffset; @@ -2520,15 +2695,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( continue; } - // All aggregates smaller than 8 bytes must be passed right-justified. - if (ObjSize < PtrByteSize && !isLittleEndian) - CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); - // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + // Create a stack object covering all stack doublewords occupied + // by the argument. If the argument is (fully or partially) on + // the stack, or if the argument is fully in registers but the + // caller has allocated the parameter save anyway, we can refer + // directly to the caller's stack frame. Otherwise, create a + // local copy in our own frame. + int FI; + if (HasParameterArea || + ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) + FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); + else + FI = MFI->CreateStackObject(ArgSize, Align, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(FIN); - if (ObjSize < 8) { + // Handle aggregates smaller than 8 bytes. + if (ObjSize < PtrByteSize) { + // The value of the object is its address, which differs from the + // address of the enclosing doubleword on big-endian systems. + SDValue Arg = FIN; + if (!isLittleEndian) { + SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); + Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); + } + InVals.push_back(Arg); + if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -2537,18 +2728,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); - Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(FuncArg), ObjType, false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area - // slot. The address of the parameter was already calculated - // above (InVals.push_back(FIN)) to be the right-justified - // offset within the slot. For this store, we need a new - // frame index that points at the beginning of the slot. - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(FuncArg), false, false, 0); @@ -2562,27 +2748,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( continue; } + // The value of the object is its address, which is the address of + // its first stack doubleword. + InVals.push_back(FIN); + + // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { - // Store whatever pieces of the object are in registers - // to memory. ArgOffset will be the address of the beginning - // of the object. - if (GPR_idx != Num_GPR_Regs) { - unsigned VReg; - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, j), - false, false, 0); - MemOps.push_back(Store); - ++GPR_idx; - ArgOffset += PtrByteSize; - } else { - ArgOffset += ArgSize - j; + if (GPR_idx == Num_GPR_Regs) break; + + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Addr = FIN; + if (j) { + SDValue Off = DAG.getConstant(j, PtrVT); + Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(FuncArg, j), + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; } + ArgOffset += ArgSize; continue; } @@ -2591,6 +2779,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::i1: case MVT::i32: case MVT::i64: + // These can be scalar arguments or elements of an integer array type + // passed directly. Clang may use those instead of "byval" aggregate + // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); @@ -2608,6 +2799,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::f32: case MVT::f64: + // These can be scalar arguments or elements of a float array type + // passed directly. The latter are used to implement ELFv2 homogenous + // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; @@ -2620,12 +2814,32 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; + } else if (GPR_idx != Num_GPR_Regs) { + // This can only ever happen in the presence of f32 array types, + // since otherwise we never run out of FPRs before running out + // of GPRs. + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::f32) { + if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) + ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, + DAG.getConstant(32, MVT::i32)); + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + } + + ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { needsLoad = true; - ArgSize = PtrByteSize; } - ArgOffset += 8; + // When passing an array of floats, the array occupies consecutive + // space in the argument area; only round up to the next doubleword + // at the end of the array. Otherwise, each float takes 8 bytes. + ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; + ArgOffset += ArgSize; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; break; case MVT::v4f32: case MVT::v4i32: @@ -2633,6 +2847,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + // These can be scalar arguments or elements of a vector array type + // passed directly. The latter are used to implement ELFv2 homogenous + // vector aggregates. if (VR_idx != Num_VR_Regs) { unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : @@ -2662,7 +2879,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // Area that is at least reserved in the caller of this function. unsigned MinReservedArea; - MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); + if (HasParameterArea) + MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); + else + MinReservedArea = LinkageSize; // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that @@ -2723,7 +2943,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, + false); unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; @@ -2849,7 +3070,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -3336,6 +3557,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain @@ -3352,42 +3574,41 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, } if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 - // Use indirect calls for ALL functions calls in JIT mode, since the - // far-call stubs may be outside relocation limits for a BL instruction. - if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { - unsigned OpFlags = 0; - if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = PPCII::MO_DARWIN_STUB; - } - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType(), - 0, OpFlags); - needIndirectCall = false; + unsigned OpFlags = 0; + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + (G->getGlobal()->isDeclaration() || + G->getGlobal()->isWeakForLinker())) || + (Subtarget.isTargetELF() && !isPPC64 && + !G->getGlobal()->hasLocalLinkage() && + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_PLT_OR_STUB; } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType(), 0, OpFlags); + needIndirectCall = false; } if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; - if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || + (Subtarget.isTargetELF() && !isPPC64 && + DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. - OpFlags = PPCII::MO_DARWIN_STUB; + OpFlags = PPCII::MO_PLT_OR_STUB; } Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), @@ -3400,7 +3621,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64) { + if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -3480,7 +3701,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64) + if (isSVR4ABI && isPPC64 && !isELFv2ABI) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3491,6 +3712,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); + + // If this is a call to __tls_get_addr, find the symbol whose address + // is to be taken and add it to the list. This will be used to + // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). + // We find the symbol by walking the chain to the CopyFromReg, walking + // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and + // pulling the symbol from that node. + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + if (!strcmp(S->getSymbol(), "__tls_get_addr")) { + assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); + SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); + SDValue TGTAddr = AddI->getOperand(1); + assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && + "Didn't find target global TLS address where we expected one"); + Ops.push_back(TGTAddr); + CallOpc = PPCISD::CALL_TLS; + } } // If this is a tail call add stack pointer delta. if (isTailCall) @@ -3502,6 +3740,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (Callee.getNode() && isELFv2ABI) + Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + return CallOpc; } @@ -3522,8 +3764,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVectorImpl<SDValue> &InVals) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. @@ -3571,6 +3813,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals) const { + + bool isELFv2ABI = Subtarget.isELFv2ABI(); std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, @@ -3589,7 +3833,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3636,7 +3881,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; - } + } else if (CallOpc == PPCISD::CALL_TLS) + // For 64-bit SVR4, TLS calls are always non-local. + CallOpc = PPCISD::CALL_NOP_TLS; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); @@ -3646,7 +3893,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); - unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag); @@ -3735,11 +3982,12 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), + PtrByteSize); if (isVarArg) { // Handle fixed and variable vector arguments differently. @@ -3776,7 +4024,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing aggregate by value arguments. SmallVector<CCValAssign, 16> ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -3948,6 +4196,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); @@ -3966,21 +4215,27 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage - // area, and parameter passing area. We start with at least 48 bytes, which - // is reserved space for [SP][CR][LR][3 x unused]. - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); + // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes + // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage + // area is 32 bytes reserved space for [SP][CR][LR][TOC]. + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); unsigned NumBytes = LinkageSize; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; + EVT OrigVT = Outs[i].ArgVT; /* Respect alignment of argument on the stack. */ - unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize); + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); NumBytes = ((NumBytes + Align - 1) / Align) * Align; NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + if (Flags.isInConsecutiveRegsLast()) + NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } unsigned NumBytesActuallyUsed = NumBytes; @@ -3990,6 +4245,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); // Tail call needs the stack to be aligned. @@ -4056,10 +4312,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + EVT OrigVT = Outs[i].ArgVT; /* Respect alignment of argument on the stack. */ unsigned Align = - CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize); + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; /* Compute GPR index associated with argument offset. */ @@ -4103,7 +4361,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT, - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); @@ -4199,6 +4457,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::i1: case MVT::i32: case MVT::i64: + // These can be scalar arguments or elements of an integer array type + // passed directly. Clang may use those instead of "byval" aggregate + // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != NumGPRs) { RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); } else { @@ -4209,39 +4470,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; break; case MVT::f32: - case MVT::f64: - if (FPR_idx != NumFPRs) { + case MVT::f64: { + // These can be scalar arguments or elements of a float array type + // passed directly. The latter are used to implement ELFv2 homogenous + // float aggregates. + + // Named arguments go into FPRs first, and once they overflow, the + // remaining arguments go into GPRs and then the parameter save area. + // Unnamed arguments for vararg functions always go to GPRs and + // then the parameter save area. For now, put all arguments to vararg + // routines always in both locations (FPR *and* GPR or stack slot). + bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; + + // First load the argument into the next available FPR. + if (FPR_idx != NumFPRs) RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); - if (isVarArg) { - // A single float or an aggregate containing only a single float - // must be passed right-justified in the stack doubleword, and - // in the GPR, if one is available. - SDValue StoreOff; - if (Arg.getSimpleValueType().SimpleTy == MVT::f32 && - !isLittleEndian) { - SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); - StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - } else - StoreOff = PtrOff; - - SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, - MachinePointerInfo(), false, false, 0); - MemOpChains.push_back(Store); - - // Float varargs are always shadowed in available integer registers - if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), false, false, - false, 0); - MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); - } - } + // Next, load the argument into GPR or stack slot if needed. + if (!NeedGPROrStack) + ; + else if (GPR_idx != NumGPRs) { + // In the non-vararg case, this can only ever happen in the + // presence of f32 array types, since otherwise we never run + // out of FPRs before running out of GPRs. + SDValue ArgVal; + + // Double values are always passed in a single GPR. + if (Arg.getValueType() != MVT::f32) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); + + // Non-array float values are extended and passed in a GPR. + } else if (!Flags.isInConsecutiveRegs()) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); + + // If we have an array of floats, we collect every odd element + // together with its predecessor into one GPR. + } else if (ArgOffset % PtrByteSize != 0) { + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); + Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + if (!isLittleEndian) + std::swap(Lo, Hi); + ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); + + // The final element, if even, goes into the first half of a GPR. + } else if (Flags.isInConsecutiveRegsLast()) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); + if (!isLittleEndian) + ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, + DAG.getConstant(32, MVT::i32)); + + // Non-final even elements are skipped; they will be handled + // together the with subsequent argument on the next go-around. + } else + ArgVal = SDValue(); + + if (ArgVal.getNode()) + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); } else { // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. - if (Arg.getValueType() == MVT::f32 && !isLittleEndian) { + if (Arg.getValueType() == MVT::f32 && + !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } @@ -4250,14 +4542,25 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, true, isTailCall, false, MemOpChains, TailCallArguments, dl); } - ArgOffset += 8; + // When passing an array of floats, the array occupies consecutive + // space in the argument area; only round up to the next doubleword + // at the end of the array. Otherwise, each float takes 8 bytes. + ArgOffset += (Arg.getValueType() == MVT::f32 && + Flags.isInConsecutiveRegs()) ? 4 : 8; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; break; + } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + // These can be scalar arguments or elements of a vector array type + // passed directly. The latter are used to implement ELFv2 homogenous + // vector aggregates. + // For a varargs call, named arguments go into VRs or on the stack as // usual; unnamed arguments always go to the stack or the corresponding // GPRs when within range. For now, we always put the value in both @@ -4328,11 +4631,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Load r2 into a virtual register and store it to the TOC save area. SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. - unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); + // In the ELFv2 ABI, R12 must contain the address of an indirect callee. + // This does not mean the MTCTR instruction must use R12; it's easier + // to model this as an extra parameter, so do that. + if (isELFv2ABI) + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -4383,7 +4691,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, + false); unsigned NumBytes = LinkageSize; // Add up all the space actually used. @@ -4522,7 +4831,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT, - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -4751,8 +5060,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), - RVLocs, Context); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_PPC); } @@ -4764,8 +5072,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain, SDLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); SDValue Flag; @@ -5773,15 +6081,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || - PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) || - PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) || - PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) { + PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { return Op; } } @@ -5789,15 +6097,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) || - PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) || - PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG)) + unsigned int ShuffleKind = isLittleEndian ? 2 : 0; + if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -6252,11 +6561,44 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, // Other Lowering Code //===----------------------------------------------------------------------===// +static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *Func = Intrinsic::getDeclaration(M, Id); + return Builder.CreateCall(Func); +} + +// The mappings for emitLeading/TrailingFence is taken from +// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (Ord == SequentiallyConsistent) + return callIntrinsic(Builder, Intrinsic::ppc_sync); + else if (isAtLeastRelease(Ord)) + return callIntrinsic(Builder, Intrinsic::ppc_lwsync); + else + return nullptr; +} + +Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (IsLoad && isAtLeastAcquire(Ord)) + return callIntrinsic(Builder, Intrinsic::ppc_lwsync); + // FIXME: this is too conservative, a dependent branch + isync is enough. + // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and + // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html + // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. + else + return nullptr; +} + MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is64bit, unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -6318,7 +6660,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, bool is8bit, // operation unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're @@ -6446,7 +6789,8 @@ llvm::MachineBasicBlock* PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -6545,7 +6889,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); const PPCRegisterInfo *TRI = - static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); + getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); @@ -6594,7 +6938,8 @@ MachineBasicBlock * PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -6613,7 +6958,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; - unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; + unsigned BP = (PVT == MVT::i64) ? PPC::X30 : + (Subtarget.isSVR4ABI() && + MF->getTarget().getRelocationModel() == Reloc::PIC_ ? + PPC::R29 : PPC::R30); MachineInstrBuilder MIB; @@ -6703,7 +7051,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return emitEHSjLjLongJmp(MI, BB); } - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); // To "insert" these instructions we actually have to insert their // control-flow patterns. @@ -6726,7 +7075,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, Cond.push_back(MI->getOperand(1)); DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond, MI->getOperand(2).getReg(), MI->getOperand(3).getReg()); @@ -6735,11 +7085,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || MI->getOpcode() == PPC::SELECT_CC_VRRC || + MI->getOpcode() == PPC::SELECT_CC_VSFRC || + MI->getOpcode() == PPC::SELECT_CC_VSRC || MI->getOpcode() == PPC::SELECT_I4 || MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || - MI->getOpcode() == PPC::SELECT_VRRC) { + MI->getOpcode() == PPC::SELECT_VRRC || + MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -6770,7 +7124,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || - MI->getOpcode() == PPC::SELECT_VRRC) { + MI->getOpcode() == PPC::SELECT_VRRC || + MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); } else { @@ -7131,151 +7487,54 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// -SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, - DAGCombinerInfo &DCI) const { - if (DCI.isAfterLegalizeVectorOps()) - return SDValue(); - - EVT VT = Op.getValueType(); - - if ((VT == MVT::f32 && Subtarget.hasFRES()) || - (VT == MVT::f64 && Subtarget.hasFRE()) || +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, + bool &UseOneConstNR) const { + EVT VT = Operand.getValueType(); + if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || + (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX())) { - - // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) - // For the reciprocal, we need to find the zero of the function: - // F(X) = A X - 1 [which has a zero at X = 1/A] - // => - // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form - // does not require additional intermediate precision] - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. The minimum architected relative - // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has - // 23 digits and double has 52 digits. - int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; + // correct after every iteration. For both FRE and FRSQRTE, the minimum + // architected relative accuracy is 2^-5. When hasRecipPrec(), this is + // 2^-14. IEEE float has 23 digits and double has 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) - ++Iterations; - - SelectionDAG &DAG = DCI.DAG; - SDLoc dl(Op); - - SDValue FPOne = - DAG.getConstantFP(1.0, VT.getScalarType()); - if (VT.isVector()) { - assert(VT.getVectorNumElements() == 4 && - "Unknown vector type"); - FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - FPOne, FPOne, FPOne, FPOne); - } - - SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); - DCI.AddToWorklist(Est.getNode()); - - // Newton iterations: Est = Est + Est (1 - Arg * Est) - for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); - DCI.AddToWorklist(NewEst.getNode()); - - NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); - DCI.AddToWorklist(NewEst.getNode()); - - NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); - DCI.AddToWorklist(NewEst.getNode()); - - Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); - DCI.AddToWorklist(Est.getNode()); - } - - return Est; + ++RefinementSteps; + UseOneConstNR = true; + return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } - return SDValue(); } -SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, - DAGCombinerInfo &DCI) const { - if (DCI.isAfterLegalizeVectorOps()) - return SDValue(); - - EVT VT = Op.getValueType(); - - if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || - (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + EVT VT = Operand.getValueType(); + if ((VT == MVT::f32 && Subtarget.hasFRES()) || + (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX())) { - - // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) - // For the reciprocal sqrt, we need to find the zero of the function: - // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] - // => - // X_{i+1} = X_i (1.5 - A X_i^2 / 2) - // As a result, we precompute A/2 prior to the iteration loop. - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. The minimum architected relative - // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has - // 23 digits and double has 52 digits. - int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; + // correct after every iteration. For both FRE and FRSQRTE, the minimum + // architected relative accuracy is 2^-5. When hasRecipPrec(), this is + // 2^-14. IEEE float has 23 digits and double has 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) - ++Iterations; - - SelectionDAG &DAG = DCI.DAG; - SDLoc dl(Op); - - SDValue FPThreeHalves = - DAG.getConstantFP(1.5, VT.getScalarType()); - if (VT.isVector()) { - assert(VT.getVectorNumElements() == 4 && - "Unknown vector type"); - FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - FPThreeHalves, FPThreeHalves, - FPThreeHalves, FPThreeHalves); - } - - SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); - DCI.AddToWorklist(Est.getNode()); - - // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that - // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); - DCI.AddToWorklist(HalfArg.getNode()); - - HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); - DCI.AddToWorklist(HalfArg.getNode()); - - // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) - for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); - DCI.AddToWorklist(NewEst.getNode()); - - NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); - DCI.AddToWorklist(NewEst.getNode()); - - NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); - DCI.AddToWorklist(NewEst.getNode()); - - Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); - DCI.AddToWorklist(Est.getNode()); - } - - return Est; + ++RefinementSteps; + return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } - return SDValue(); } -// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does -// not enforce equality of the chain operands. -static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, +static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { - EVT VT = LS->getMemoryVT(); if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LS->getBasePtr(); SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) @@ -7306,11 +7565,77 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, return false; } +// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does +// not enforce equality of the chain operands. +static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, + unsigned Bytes, int Dist, + SelectionDAG &DAG) { + if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { + EVT VT = LS->getMemoryVT(); + SDValue Loc = LS->getBasePtr(); + return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); + } + + if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { + EVT VT; + switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + default: return false; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::ppc_vsx_lxvw4x: + VT = MVT::v4i32; + break; + case Intrinsic::ppc_vsx_lxvd2x: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_altivec_lvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_lvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_lvewx: + VT = MVT::i32; + break; + } + + return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); + } + + if (N->getOpcode() == ISD::INTRINSIC_VOID) { + EVT VT; + switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + default: return false; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + case Intrinsic::ppc_vsx_stxvw4x: + VT = MVT::v4i32; + break; + case Intrinsic::ppc_vsx_stxvd2x: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_altivec_stvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_stvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_stvewx: + VT = MVT::i32; + break; + } + + return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); + } + + return false; +} + // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though -// token factors and other loads (but nothing else). As a result, a true -// results indicates that it is safe to create a new consecutive load adjacent -// to the load provided. +// token factors and other loads (but nothing else). As a result, a true result +// indicates that it is safe to create a new consecutive load adjacent to the +// load provided. static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); @@ -7324,10 +7649,10 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { // nodes just above the top-level loads and token factors. while (!Queue.empty()) { SDNode *ChainNext = Queue.pop_back_val(); - if (!Visited.insert(ChainNext)) + if (!Visited.insert(ChainNext).second) continue; - if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { + if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; @@ -7355,17 +7680,17 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); - if (!Visited.insert(LoadRoot)) + if (!Visited.insert(LoadRoot).second) continue; - if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) + if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) - if (((isa<LoadSDNode>(*UI) && - cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || + if (((isa<MemSDNode>(*UI) && + cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } @@ -7485,7 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, SDValue BinOp = BinOps.back(); BinOps.pop_back(); - if (!Visited.insert(BinOp.getNode())) + if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); @@ -7699,7 +8024,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, SDValue BinOp = BinOps.back(); BinOps.pop_back(); - if (!Visited.insert(BinOp.getNode())) + if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); @@ -7936,92 +8261,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); - case ISD::FDIV: { - assert(TM.Options.UnsafeFPMath && - "Reciprocal estimates require UnsafeFPMath"); - - if (N->getOperand(1).getOpcode() == ISD::FSQRT) { - SDValue RV = - DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); - if (RV.getNode()) { - DCI.AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), - N->getOperand(0), RV); - } - } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && - N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { - SDValue RV = - DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), - DCI); - if (RV.getNode()) { - DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), - N->getValueType(0), RV); - DCI.AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), - N->getOperand(0), RV); - } - } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && - N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { - SDValue RV = - DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), - DCI); - if (RV.getNode()) { - DCI.AddToWorklist(RV.getNode()); - RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), - N->getValueType(0), RV, - N->getOperand(1).getOperand(1)); - DCI.AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), - N->getOperand(0), RV); - } - } - - SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); - if (RV.getNode()) { - DCI.AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), - N->getOperand(0), RV); - } - - } - break; - case ISD::FSQRT: { - assert(TM.Options.UnsafeFPMath && - "Reciprocal estimates require UnsafeFPMath"); - - // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the - // reciprocal sqrt. - SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); - if (RV.getNode()) { - DCI.AddToWorklist(RV.getNode()); - RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode()) { - // Unfortunately, RV is now NaN if the input was exactly 0. Select out - // this case and force the answer to 0. - - EVT VT = RV.getValueType(); - - SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); - if (VT.isVector()) { - assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); - Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); - } - - SDValue ZeroCmp = - DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), - N->getOperand(0), Zero, ISD::SETEQ); - DCI.AddToWorklist(ZeroCmp.getNode()); - DCI.AddToWorklist(RV.getNode()); - - RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, - ZeroCmp, Zero, RV); - return RV; - } - } - - } - break; case ISD::SINT_TO_FP: if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { @@ -8112,6 +8351,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); if (ISD::isNON_EXTLoad(N) && VT.isVector() && TM.getSubtarget<PPCSubtarget>().hasAltivec() && + // P8 and later hardware should just use LOAD. + !TM.getSubtarget<PPCSubtarget>().hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32) && LD->getAlignment() < ABIAlignment) { @@ -8149,17 +8390,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, Intrinsic::ppc_altivec_lvsl); SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); - // Refine the alignment of the original load (a "new" load created here - // which was identical to the first except for the alignment would be - // merged with the existing node regardless). + // Create the new MMO for the new base load. It is like the original MMO, + // but represents an area in memory almost twice the vector size centered + // on the original address. If the address is unaligned, we might start + // reading up to (sizeof(vector)-1) bytes below the address of the + // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(LD->getPointerInfo(), - LD->getMemOperand()->getFlags(), - LD->getMemoryVT().getStoreSize(), - ABIAlignment); - LD->refineAlignment(MMO); - SDValue BaseLoad = SDValue(LD, 0); + MachineMemOperand *BaseMMO = + MF.getMachineMemOperand(LD->getMemOperand(), + -LD->getMemoryVT().getStoreSize()+1, + 2*LD->getMemoryVT().getStoreSize()-1); + + // Create the new base load. + SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, + getPointerTy()); + SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; + SDValue BaseLoad = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, + DAG.getVTList(MVT::v4i32, MVT::Other), + BaseLoadOps, MVT::v4i32, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the @@ -8181,21 +8430,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + MachineMemOperand *ExtraMMO = + MF.getMachineMemOperand(LD->getMemOperand(), + 1, 2*LD->getMemoryVT().getStoreSize()-1); + SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = - DAG.getLoad(VT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncOffset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), ABIAlignment); + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, + DAG.getVTList(MVT::v4i32, MVT::Other), + ExtraLoadOps, MVT::v4i32, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); - if (BaseLoad.getValueType() != MVT::v4i32) - BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); - - if (ExtraLoad.getValueType() != MVT::v4i32) - ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); - // Because vperm has a big-endian bias, we must reverse the order // of the input vectors and complement the permute control vector // when generating little endian code. We have already handled the @@ -8212,36 +8458,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (VT != MVT::v4i32) Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); - // Now we need to be really careful about how we update the users of the - // original load. We cannot just call DCI.CombineTo (or - // DAG.ReplaceAllUsesWith for that matter), because the load still has - // uses created here (the permutation for example) that need to stay. - SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - while (UI != UE) { - SDUse &Use = UI.getUse(); - SDNode *User = *UI; - // Note: BaseLoad is checked here because it might not be N, but a - // bitcast of N. - if (User == Perm.getNode() || User == BaseLoad.getNode() || - User == TF.getNode() || Use.getResNo() > 1) { - ++UI; - continue; - } - - SDValue To = Use.getResNo() ? TF : Perm; - ++UI; - - SmallVector<SDValue, 8> Ops; - for (const SDUse &O : User->ops()) { - if (O == Use) - Ops.push_back(To); - else - Ops.push_back(O); - } - - DAG.UpdateNodeOperands(User, Ops); - } - + // The output of the permutation is our loaded result, the TokenFactor is + // our new chain. + DCI.CombineTo(N, Perm, TF); return SDValue(N, 0); } } @@ -8659,7 +8878,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); @@ -8872,6 +9092,92 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } +bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + + switch (Intrinsic) { + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::ppc_altivec_lvebx: + case Intrinsic::ppc_altivec_lvehx: + case Intrinsic::ppc_altivec_lvewx: + case Intrinsic::ppc_vsx_lxvd2x: + case Intrinsic::ppc_vsx_lxvw4x: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_altivec_lvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_lvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_lvewx: + VT = MVT::i32; + break; + case Intrinsic::ppc_vsx_lxvd2x: + VT = MVT::v2f64; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(0); + Info.offset = -VT.getStoreSize()+1; + Info.size = 2*VT.getStoreSize()-1; + Info.align = 1; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + case Intrinsic::ppc_altivec_stvebx: + case Intrinsic::ppc_altivec_stvehx: + case Intrinsic::ppc_altivec_stvewx: + case Intrinsic::ppc_vsx_stxvd2x: + case Intrinsic::ppc_vsx_stxvw4x: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_altivec_stvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_stvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_stvewx: + VT = MVT::i32; + break; + case Intrinsic::ppc_vsx_stxvd2x: + VT = MVT::v2f64; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(1); + Info.offset = -VT.getStoreSize()+1; + Info.size = 2*VT.getStoreSize()-1; + Info.align = 1; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + return true; + } + default: + break; + } + + return false; +} + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination @@ -8931,9 +9237,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } -bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { if (DisablePPCUnaligned) return false; @@ -8948,7 +9255,8 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { - if (VT != MVT::v2f64 && VT != MVT::v2i64) + if (VT != MVT::v2f64 && VT != MVT::v2i64 && + VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index df05aa5..bb4d1f1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H -#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H +#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H #include "PPC.h" #include "PPCInstrInfo.h" @@ -99,6 +99,10 @@ namespace llvm { /// SVR4 calls. CALL, CALL_NOP, + /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used + /// to access TLS variables. + CALL_TLS, CALL_NOP_TLS, + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -181,6 +185,10 @@ namespace llvm { /// on PPC32. PPC32_GOT, + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and + /// local dynamic TLS on PPC32. + PPC32_PICGOT, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. @@ -210,10 +218,6 @@ namespace llvm { /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, - /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). - GET_TLS_ADDR, - /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. @@ -224,10 +228,6 @@ namespace llvm { /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, - /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). - GET_TLSLD_ADDR, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the /// local-dynamic TLS model, produces an ADDIS8 instruction /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed @@ -297,27 +297,28 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG); + unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG); + unsigned ShuffleKind, SelectionDAG &DAG); - /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift - /// amount, otherwise return -1. - int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the + /// shift amount, otherwise return -1. + int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to @@ -344,7 +345,7 @@ namespace llvm { const PPCSubtarget &Subtarget; public: - explicit PPCTargetLowering(PPCTargetMachine &TM); + explicit PPCTargetLowering(const PPCTargetMachine &TM); /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. @@ -355,6 +356,11 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + bool enableAggressiveFMAFusion(EVT VT) const override; + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -403,6 +409,11 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const override; + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; @@ -472,6 +483,10 @@ namespace llvm { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const override; + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination @@ -490,9 +505,10 @@ namespace llvm { /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - bool allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align = 1, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -510,6 +526,20 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { + // We support any array type as "consecutive" block in the parameter + // save area. The element type defines the alignment requirement and + // whether the argument should go in GPRs, FPRs, or VRs if available. + // + // Note that clang uses this capability both to implement the ELFv2 + // homogeneous float/vector aggregate ABI, and to avoid having to use + // "byval" when passing aggregates that might fully fit in registers. + return Ty->isArrayTy(); + } + private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -533,6 +563,8 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl, + SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; @@ -666,8 +698,12 @@ namespace llvm { SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; - SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; + + SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps, + bool &UseOneConstNR) const override; + SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 9318f70..9a19abb 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)), def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), + (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; -// The following three definitions are selected for small code model only. +// The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), @@ -801,8 +804,12 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocCPT", [(set i64:$rD, (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; +def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), + "#LDtocCPT", + [(set i64:$rD, + (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in +let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src), "ld 2, $src", IIC_LdStLD, [(PPCload_toc ixaddr:$src)]>, isPPC64; @@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - "#GETtlsADDR", - [(set i64:$rD, - (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), - "#GETtlsldADDR", - [(set i64:$rD, - (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, - isPPC64; def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, @@ -1135,3 +1132,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)), def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; +// 64-bits atomic loads and stores +def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>; +def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>; + +def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>; +def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index dce46d8..4ef08eb 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -22,110 +22,143 @@ def vnot_ppc : PatFrag<(ops node:$in), def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false, - *CurDAG); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); }]>; def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false, - *CurDAG); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); }]>; def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true, - *CurDAG); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); }]>; def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true, - *CurDAG); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); }]>; +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; +def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); }]>; def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); }]>; def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); }]>; def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); }]>; def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); }]>; def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); }]>; def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); }]>; def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); }]>; def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true, - *CurDAG); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); }]>; def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); }]>; def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); }]>; def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true, - *CurDAG); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); +}]>; + + +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); +}]>; +def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); }]>; def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG)); }]>; def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1; + return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1; }], VSLDOI_get_imm>; /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG)); }]>; def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1; + return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1; }], VSLDOI_unary_get_imm>; +/// VSLDOI_swapped* - These fragments are provided for little-endian, where +/// the inputs must be swapped for correct semantics. +def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG)); +}]>; +def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1; +}], VSLDOI_get_imm>; + + // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG)); @@ -242,48 +275,64 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">; let Predicates = [HasAltivec] in { -let isCodeGenOnly = 1 in { -def DSS : DSS_Form<822, (outs), - (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; -def DSSALL : DSS_Form<822, (outs), - (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; -def DST : DSS_Form<342, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; -def DSTT : DSS_Form<342, (outs), - (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; -def DSTST : DSS_Form<374, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; -def DSTSTT : DSS_Form<374, (outs), - (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, - Deprecated<DeprecatedDST>; +def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM), + "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>, + Deprecated<DeprecatedDST> { + let A = 0; + let B = 0; +} -def DST64 : DSS_Form<342, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, +def DSSALL : DSS_Form<1, 822, (outs), (ins), + "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>, + Deprecated<DeprecatedDST> { + let STRM = 0; + let A = 0; + let B = 0; +} + +def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>, Deprecated<DeprecatedDST>; -def DSTT64 : DSS_Form<342, (outs), - (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, + +def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>, Deprecated<DeprecatedDST>; -def DSTST64 : DSS_Form<374, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, + +def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>, Deprecated<DeprecatedDST>; -def DSTSTT64 : DSS_Form<374, (outs), - (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, + +def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>, Deprecated<DeprecatedDST>; + +let isCodeGenOnly = 1 in { + // The very same instructions as above, but formally matching 64bit registers. + def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstst i64:$rA, i32:$rB, + imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dststt i64:$rA, i32:$rB, + imm:$STRM)]>, + Deprecated<DeprecatedDST>; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), @@ -731,30 +780,6 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), // Additional Altivec Patterns // -// DS* intrinsics -def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; -def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; - -// * 32-bit -def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM), - (DST 0, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM), - (DSTT 1, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM), - (DSTST 0, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM), - (DSTSTT 1, imm:$STRM, $rA, $rB)>; - -// * 64-bit -def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM), - (DST64 0, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM), - (DSTT64 1, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM), - (DSTST64 0, imm:$STRM, $rA, $rB)>; -def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM), - (DSTSTT64 1, imm:$STRM, $rA, $rB)>; - // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; @@ -789,6 +814,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), (VPKUHUM $vA, $vA)>; +// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. +// These fragments are matched for little-endian, where the inputs must +// be swapped for correct semantics. +def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB), + (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>; +def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUWUM $vB, $vA)>; +def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUHUM $vB, $vA)>; + // Match vmrg*(x,x) def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), (VMRGLB $vA, $vA)>; @@ -803,6 +838,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), (VMRGHW $vA, $vA)>; +// Match vmrg*(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be +// swapped for correct semantics. +def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLB $vB, $vA)>; +def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLH $vB, $vA)>; +def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLW $vB, $vA)>; +def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHB $vB, $vA)>; +def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHH $vB, $vA)>; +def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHW $vB, $vA)>; + // Logical Operations def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>; diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h index b424d11..cf71b1c 100644 --- a/lib/Target/PowerPC/PPCInstrBuilder.h +++ b/lib/Target/PowerPC/PPCInstrBuilder.h @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC_INSTRBUILDER_H -#define POWERPC_INSTRBUILDER_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H +#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 1e4396c..aa68497 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -380,6 +380,11 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms let Inst{31} = RC; } +class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> { + let RST = 0; +} + // This is the same as XForm_base_r3xo, but the first two operands are swapped // when code is emitted. class XForm_base_r3xo_swapped @@ -417,6 +422,22 @@ class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let B = 0; } +class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<5> A; + bits<1> WS; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{20} = WS; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { @@ -457,6 +478,52 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<4> CT; + bits<5> RA; + bits<5> RB; + + let Inst{6} = 0; + let Inst{7-10} = CT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<4> SR; + + let Inst{6-10} = RS; + let Inst{12-15} = SR; + let Inst{21-30} = xo; +} + +class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> MO; + + let Inst{6-10} = MO; + let Inst{21-30} = xo; +} + +class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<5> RB; + + let Inst{6-10} = RS; + let Inst{16-20} = RB; + let Inst{21-30} = xo; +} + class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { @@ -764,10 +831,9 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, // DSS_Form - Form X instruction, used for altivec dss* instructions. -class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr, +class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<31, OOL, IOL, asmstr, itin> { - bits<1> T; bits<2> STRM; bits<5> A; bits<5> B; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 9bac91d..daf8790 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -75,7 +75,7 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = - &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); + static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG); } @@ -331,6 +331,11 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opcode)); } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(PPC::NOP); +} + // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. @@ -1617,6 +1622,7 @@ protected: bool Changed = false; MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); I != IE; ++I) { MachineInstr *MI = I; @@ -1682,16 +1688,26 @@ protected: // In theory, there could be other uses of the addend copy before this // fma. We could deal with this, but that would require additional // logic below and I suspect it will not occur in any relevant - // situations. - bool OtherUsers = false; + // situations. Additionally, check whether the copy source is killed + // prior to the fma. In order to replace the addend here with the + // source of the copy, it must still be live here. We can't use + // interval testing for a physical register, so as long as we're + // walking the MIs we may as well test liveness here. + bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); - J != JE; --J) + J != JE; --J) { if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { OtherUsers = true; break; } + if (J->modifiesRegister(AddendSrcReg, TRI) || + J->killsRegister(AddendSrcReg, TRI)) { + KillsAddendSrc = true; + break; + } + } - if (OtherUsers) + if (OtherUsers || KillsAddendSrc) continue; // Find one of the product operands that is killed by this instruction. @@ -1712,10 +1728,11 @@ protected: if (!KilledProdOp) continue; - // In order to replace the addend here with the source of the copy, - // it must still be live here. - if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx)) - continue; + // For virtual registers, verify that the addend source register + // is live here (as should have been assured above). + assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || + LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && + "Addend source register is not live!"); // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. @@ -1737,6 +1754,12 @@ protected: unsigned OldFMAReg = MI->getOperand(0).getReg(); + // The transformation doesn't work well with things like: + // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; + // so leave such things alone. + if (OldFMAReg == KilledProdReg) + continue; + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && "Addend copy not tied to old FMA output!"); @@ -1827,7 +1850,7 @@ public: LIS = &getAnalysis<LiveIntervals>(); - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); bool Changed = false; @@ -1980,7 +2003,7 @@ public: // If we don't have VSX on the subtarget, don't do anything. if (!TM->getSubtargetImpl()->hasVSX()) return false; - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); bool Changed = false; @@ -2057,7 +2080,7 @@ public: // If we don't have VSX don't bother doing anything here. if (!TM->getSubtargetImpl()->hasVSX()) return false; - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); bool Changed = false; @@ -2214,7 +2237,7 @@ protected: public: bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); bool Changed = false; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 83f14c6..4d310fe 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC_INSTRUCTIONINFO_H -#define POWERPC_INSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H +#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H #include "PPC.h" #include "PPCRegisterInfo.h" @@ -228,6 +228,8 @@ public: /// instruction may be. This returns the maximum number of bytes. /// unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + + void getNoopForMachoTarget(MCInst &NopInst) const override; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index c2e3382..8c76c46 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; +def tocentry32 : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. @@ -107,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; -def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; -def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, [SDNPHasChain]>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; @@ -133,9 +134,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, @@ -417,6 +424,15 @@ def u2imm : Operand<i32> { let PrintMethod = "printU2ImmOperand"; let ParserMatchClass = PPCU2ImmAsmOperand; } + +def PPCU4ImmAsmOperand : AsmOperandClass { + let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; + let RenderMethod = "addImmOperands"; +} +def u4imm : Operand<i32> { + let PrintMethod = "printU4ImmOperand"; + let ParserMatchClass = PPCU4ImmAsmOperand; +} def PPCS5ImmAsmOperand : AsmOperandClass { let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; let RenderMethod = "addImmOperands"; @@ -446,7 +462,7 @@ def u6imm : Operand<i32> { } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addS16ImmOperands"; } def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; @@ -456,7 +472,7 @@ def s16imm : Operand<i32> { } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addU16ImmOperands"; } def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; @@ -466,7 +482,7 @@ def u16imm : Operand<i32> { } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addS16ImmOperands"; } def s17imm : Operand<i32> { // This operand type is used for addis/lis to allow the assembler parser @@ -542,7 +558,7 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> { def PPCDispRIOperand : AsmOperandClass { let Name = "DispRI"; let PredicateMethod = "isS16Imm"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addS16ImmOperands"; } def dispRI : Operand<iPTR> { let ParserMatchClass = PPCDispRIOperand; @@ -554,6 +570,27 @@ def PPCDispRIXOperand : AsmOperandClass { def dispRIX : Operand<iPTR> { let ParserMatchClass = PPCDispRIXOperand; } +def PPCDispSPE8Operand : AsmOperandClass { + let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; + let RenderMethod = "addImmOperands"; +} +def dispSPE8 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE8Operand; +} +def PPCDispSPE4Operand : AsmOperandClass { + let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4"; + let RenderMethod = "addImmOperands"; +} +def dispSPE4 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE4Operand; +} +def PPCDispSPE2Operand : AsmOperandClass { + let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2"; + let RenderMethod = "addImmOperands"; +} +def dispSPE2 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE2Operand; +} def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; @@ -571,6 +608,21 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. let EncoderMethod = "getMemRIXEncoding"; let DecoderMethod = "decodeMemRIXOperands"; } +def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE8DisEncoding"; +} +def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE4DisEncoding"; +} +def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE2DisEncoding"; +} // A single-register address. This is used with the SjLj // pseudo-instructions. @@ -585,6 +637,12 @@ def tlsreg32 : Operand<i32> { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; } +def tlsgd32 : Operand<i32> {} +def tlscall32 : Operand<i32> { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} // PowerPC Predicate operand. def pred : Operand<OtherVT> { @@ -611,6 +669,12 @@ def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; def IsBookE : Predicate<"PPCSubTarget->isBookE()">; def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; +def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">; +def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">; +def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">; +def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; +def IsE500 : Predicate<"PPCSubTarget->isE500()">; +def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -967,6 +1031,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let Defs = [LR] in def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; +let Defs = [LR] in + def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>, + PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { @@ -1068,6 +1135,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { + def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func), + "bl $func", IIC_BrB, []>; def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), @@ -1243,8 +1312,15 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; +def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src), + "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>; + def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), - (DCBT xoaddr:$dst)>; + (DCBT xoaddr:$dst)>; // data prefetch for loads +def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)), + (DCBTST xoaddr:$dst)>; // data prefetch for stores +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)), + (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read) // Atomic operations let usesCustomInserter = 1 in { @@ -1628,17 +1704,19 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), "stmw $rS, $dst", IIC_LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), - "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>; + "sync $L", IIC_LdStSync, []>; let isCodeGenOnly = 1 in { def MSYNC : XForm_24_sync<31, 598, (outs), (ins), - "msync", IIC_LdStSync, []>, Requires<[IsBookE]> { + "msync", IIC_LdStSync, []> { let L = 0; } } -def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>; -def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>; +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>; +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; +def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; //===----------------------------------------------------------------------===// // PPC32 Arithmetic Instructions. @@ -2355,6 +2433,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)), def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; +def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), + (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -2393,13 +2473,47 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", [(set i32:$rD, (PPCppc32GOT))]>; +// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. +// This uses two output registers, the first as the real output, the second as a +// temporary register, used internally in code generation. +def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", + []>, NoEncode<"$rT">; + def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), - "#LDgotTprelL32", - [(set i32:$rD, - (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), (ADD4TLS $in, tglobaltlsaddr:$g)>; +def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsgdL32", + [(set i32:$rD, + (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsldL32", + [(set i32:$rD, + (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDIdtprelL32", + [(set i32:$rD, + (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; +def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDISdtprelHA32", + [(set i32:$rD, + (PPCaddisDtprelHA i32:$reg, + tglobaltlsaddr:$disp))]>; + +// Support for Position-independent code +def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), + "#LWZtoc", + [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +// Get Global (GOT) Base Register offset, from the word immediately preceding +// the function label. +def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; + + // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. @@ -2434,8 +2548,15 @@ def : Pat<(f64 (extloadf32 xaddr:$src)), def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; -def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>; -def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>; +// Only seq_cst fences require the heavyweight sync (SYNC 0). +// All others can use the lightweight sync (SYNC 1). +// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits +// versions of Power. +def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>; // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), @@ -2454,6 +2575,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA), (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; include "PPCInstrAltivec.td" +include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" @@ -2970,6 +3092,16 @@ def : Pat<(i1 (not (trunc i64:$in))), // PowerPC Instructions used for assembler/disassembler only // +// FIXME: For B=0 or B > 8, the registers following RT are used. +// WARNING: Do not add patterns for this instruction without fixing this. +def LSWI : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B), + "lswi $RT, $A, $B", IIC_LdStLoad, []>; + +// FIXME: For B=0 or B > 8, the registers following RT are used. +// WARNING: Do not add patterns for this instruction without fixing this. +def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B), + "stswi $RT, $A, $B", IIC_LdStLoad, []>; + def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), "isync", IIC_SprISYNC, []>; @@ -2982,9 +3114,47 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), "wait $L", IIC_LdStLoad, []>; +def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO), + "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>; + +def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR), + "mtsr $SR, $RS", IIC_SprMTSR>; + +def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR), + "mfsr $RS, $SR", IIC_SprMFSR>; + +def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB), + "mtsrin $RS, $RB", IIC_SprMTSR>; + +def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB), + "mfsrin $RS, $RB", IIC_SprMFSR>; + def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), "mtmsr $RS, $L", IIC_SprMTMSR>; +def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS), + "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> { + let L = 0; +} + +def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>, + Requires<[IsBookE]> { + bits<1> E; + + let Inst{16} = E; + let Inst{21-30} = 163; +} + +def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B), + "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; +def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B), + "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; + +def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; + def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), "mfmsr $RT", IIC_SprMFMSR, []>; @@ -3002,15 +3172,66 @@ def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; +def TLBIA : XForm_0<31, 370, (outs), (ins), + "tlbia", IIC_SprTLBIA, []>; + def TLBSYNC : XForm_0<31, 566, (outs), (ins), "tlbsync", IIC_SprTLBSYNC, []>; def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), "tlbiel $RB", IIC_SprTLBIEL, []>; +def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB), + "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; +def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB), + "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; + def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), "tlbie $RB,$RS", IIC_SprTLBIE, []>; +def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B", + IIC_LdStLoad>, Requires<[IsBookE]>; + +def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B", + IIC_LdStLoad>, Requires<[IsBookE]>; + +def TLBRE : XForm_24_eieio<31, 946, (outs), (ins), + "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>; + +def TLBWE : XForm_24_eieio<31, 978, (outs), (ins), + "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>; + +def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS), + "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; + +def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS), + "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; + +def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), + "tlbsx $RST, $A, $B", IIC_LdStLoad, []>, + Requires<[IsPPC4xx]>; +def TLBSX2D : XForm_base_r3xo<31, 914, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>, + Requires<[IsPPC4xx]>, isDOT; + +def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>; + +def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>, + Requires<[IsBookE]>; +def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>, + Requires<[IsBookE]>; + +def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>, + Requires<[IsE500]>; +def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>, + Requires<[IsE500]>; + +def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR), + "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>; +def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR), + "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // @@ -3033,15 +3254,17 @@ class PPCAsmPseudo<string asm, dag iops> def : InstAlias<"sc", (SC 0)>; -def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>; -def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>; -def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>; -def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>; +def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>; +def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>; +def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>; +def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>; def : InstAlias<"wait", (WAIT 0)>; def : InstAlias<"waitrsv", (WAIT 1)>; def : InstAlias<"waitimpl", (WAIT 2)>; +def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>; + def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; @@ -3050,9 +3273,57 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)> def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; +def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>; +def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>; + +def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>; +def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>; + +def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>; +def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>; + +def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>; +def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>; + +def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>; +def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>; + +def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>; +def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>; + +def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>; +def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>; + +def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>; +def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>; + +def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>; +def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>; + +def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>; +def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>; + +def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>; +def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>; + def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>; +def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>; def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>; +def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>; +def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>; + +def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>; + def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; @@ -3063,6 +3334,60 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>; +foreach BATR = 0-3 in { + def : InstAlias<"mtdbatu "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfdbatu $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtdbatl "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfdbatl $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtibatu "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfibatu $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtibatl "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfibatl $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>, + Requires<[IsPPC6xx]>; +} + +foreach BR = 0-7 in { + def : InstAlias<"mfbr"#BR#" $Rx", + (MFDCR gprc:$Rx, !add(BR, 0x80))>, + Requires<[IsPPC4xx]>; + def : InstAlias<"mtbr"#BR#" $Rx", + (MTDCR gprc:$Rx, !add(BR, 0x80))>, + Requires<[IsPPC4xx]>; +} + +def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>; +def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>; + +def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>; + def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", @@ -3082,25 +3407,25 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>; def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; -def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>; -def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>; -def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>; -def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>; - -def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>; -def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>; -def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>; -def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>; +def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>; +def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>; -def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>; -def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>; -def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>; -def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>; - -def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>; -def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>; -def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>; -def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>; +foreach SPRG = 0-3 in { + def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>; + def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>; + def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; + def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; +} +foreach SPRG = 4-7 in { + def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>, + Requires<[IsBookE]>; + def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>, + Requires<[IsBookE]>; + def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, + Requires<[IsBookE]>; + def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, + Requires<[IsBookE]>; +} def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>; @@ -3119,6 +3444,15 @@ def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>; def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; +def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>, + Requires<[IsPPC4xx]>; + def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", @@ -3367,3 +3701,18 @@ defm : TrapExtendedMnemonic<"lnl", 5>; defm : TrapExtendedMnemonic<"lng", 6>; defm : TrapExtendedMnemonic<"u", 31>; +// Atomic loads +def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>; +def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>; +def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>; +def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>; +def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>; +def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>; + +// Atomic stores +def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>; +def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>; +def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>; diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td new file mode 100644 index 0000000..cc3a4d2 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrSPE.td @@ -0,0 +1,447 @@ +//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Signal Processing Engine extension to +// the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = []; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> { + let RB = 0; +} + +class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + + let Pattern = []; + + let Inst{6-8} = crD; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<21> D; + + let Pattern = []; + + let Inst{6-10} = RT; + let Inst{20} = D{0}; + let Inst{19} = D{1}; + let Inst{18} = D{2}; + let Inst{17} = D{3}; + let Inst{16} = D{4}; + let Inst{15} = D{5}; + let Inst{14} = D{6}; + let Inst{13} = D{7}; + let Inst{12} = D{8}; + let Inst{11} = D{9}; + let Inst{11-20} = D{0-9}; + let Inst{21-31} = xo; +} + +let Predicates = [HasSPE], isAsmParserOnly = 1 in { + +def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst), + "evldd $RT, $dst", IIC_VecFP>; +def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst), + "evldw $RT, $dst", IIC_VecFP>; +def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst), + "evldh $RT, $dst", IIC_VecFP>; +def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst), + "evlhhesplat $RT, $dst", IIC_VecFP>; +def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst), + "evlhhousplat $RT, $dst", IIC_VecFP>; +def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst), + "evlhhossplat $RT, $dst", IIC_VecFP>; +def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst), + "evlwhe $RT, $dst", IIC_VecFP>; +def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst), + "evlwhou $RT, $dst", IIC_VecFP>; +def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst), + "evlwhos $RT, $dst", IIC_VecFP>; +def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst), + "evlwwsplat $RT, $dst", IIC_VecFP>; +def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst), + "evlwhsplat $RT, $dst", IIC_VecFP>; + +def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst), + "evstdd $RT, $dst", IIC_VecFP>; +def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst), + "evstdh $RT, $dst", IIC_VecFP>; +def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst), + "evstdw $RT, $dst", IIC_VecFP>; +def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst), + "evstwhe $RT, $dst", IIC_VecFP>; +def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst), + "evstwho $RT, $dst", IIC_VecFP>; +def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst), + "evstwwe $RT, $dst", IIC_VecFP>; +def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst), + "evstwwo $RT, $dst", IIC_VecFP>; + +def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA), + "evmra $RT, $RA", IIC_VecFP> { + let RB = 0; +} + +def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "brinc $RT, $RA, $RB", IIC_VecFP>; +def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA), + "evabs $RT, $RA", IIC_VecFP>; + +def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), + "evaddiw $RT, $RB, $RA", IIC_VecFP>; +def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA), + "evaddsmiaaw $RT, $RA", IIC_VecFP>; +def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA), + "evaddssiaaw $RT, $RA", IIC_VecFP>; +def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA), + "evaddusiaaw $RT, $RA", IIC_VecFP>; +def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA), + "evaddumiaaw $RT, $RA", IIC_VecFP>; +def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evaddw $RT, $RA, $RB", IIC_VecFP>; + +def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evand $RT, $RA, $RB", IIC_VecFP>; +def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evandc $RT, $RA, $RB", IIC_VecFP>; + +def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), + "evcmpeq $crD, $RA, $RB", IIC_VecFP>; +def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), + "evcmpgts $crD, $RA, $RB", IIC_VecFP>; +def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), + "evcmpgtu $crD, $RA, $RB", IIC_VecFP>; +def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), + "evcmplts $crD, $RA, $RB", IIC_VecFP>; +def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB), + "evcmpltu $crD, $RA, $RB", IIC_VecFP>; + +def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA), + "evcntlsw $RT, $RA", IIC_VecFP>; +def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA), + "evcntlzw $RT, $RA", IIC_VecFP>; + +def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evdivws $RT, $RA, $RB", IIC_VecFP>; +def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evdivwu $RT, $RA, $RB", IIC_VecFP>; + +def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "eveqv $RT, $RA, $RB", IIC_VecFP>; + +def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA), + "evextsb $RT, $RA", IIC_VecFP>; +def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA), + "evextsh $RT, $RA", IIC_VecFP>; + +def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlddx $RT, $RA, $RB", IIC_VecFP>; +def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evldwx $RT, $RA, $RB", IIC_VecFP>; +def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evldhx $RT, $RA, $RB", IIC_VecFP>; +def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>; +def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>; +def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>; +def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlwhex $RT, $RA, $RB", IIC_VecFP>; +def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlwhoux $RT, $RA, $RB", IIC_VecFP>; +def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlwhosx $RT, $RA, $RB", IIC_VecFP>; +def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>; +def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>; + +def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergehi $RT, $RA, $RB", IIC_VecFP>; +def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergelo $RT, $RA, $RB", IIC_VecFP>; +def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergehilo $RT, $RA, $RB", IIC_VecFP>; +def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergelohi $RT, $RA, $RB", IIC_VecFP>; + +def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>; +def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegsmian $RT, $RA, $RB", IIC_VecFP>; +def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhegumian $RT, $RA, $RB", IIC_VecFP>; + +def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmf $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmfa $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmi $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmia $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhesmianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessf $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessfa $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessfanw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhessianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheumi $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheumia $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheumianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmheusianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogsmian $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhogumian $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmf $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmfa $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmi $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmia $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhosmianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossf $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossfa $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossfanw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhossianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhoumi $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhoumia $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhoumianw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmhousianw $RT, $RA, $RB", IIC_VecFP>; + + +def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhsmf $RT, $RA, $RB", IIC_VecFP>; +def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>; +def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhsmi $RT, $RA, $RB", IIC_VecFP>; +def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhsmia $RT, $RA, $RB", IIC_VecFP>; +def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhssf $RT, $RA, $RB", IIC_VecFP>; +def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhssfa $RT, $RA, $RB", IIC_VecFP>; +def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhumi $RT, $RA, $RB", IIC_VecFP>; +def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwhumia $RT, $RA, $RB", IIC_VecFP>; +def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlssianw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlumi $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlumia $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlumianw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>; +def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwlusianw $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmf $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmfa $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmfan $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmi $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmia $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwsmian $RT, $RA, $RB", IIC_VecFP>; +def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwssf $RT, $RA, $RB", IIC_VecFP>; +def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwssfa $RT, $RA, $RB", IIC_VecFP>; +def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwssfaa $RT, $RA, $RB", IIC_VecFP>; +def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwssfan $RT, $RA, $RB", IIC_VecFP>; +def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwumi $RT, $RA, $RB", IIC_VecFP>; +def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwumia $RT, $RA, $RB", IIC_VecFP>; +def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwumiaa $RT, $RA, $RB", IIC_VecFP>; +def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evmwumian $RT, $RA, $RB", IIC_VecFP>; + + +def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evnand $RT, $RA, $RB", IIC_VecFP>; + +def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA), + "evneg $RT, $RA", IIC_VecFP>; + +def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evnor $RT, $RA, $RB", IIC_VecFP>; +def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evor $RT, $RA, $RB", IIC_VecFP>; +def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evorc $RT, $RA, $RB", IIC_VecFP>; + +def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), + "evrlwi $RT, $RA, $RB", IIC_VecFP>; +def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evrlw $RT, $RA, $RB", IIC_VecFP>; + +def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA), + "evrndw $RT, $RA", IIC_VecFP>; + +def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), + "evslwi $RT, $RA, $RB", IIC_VecFP>; +def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evslw $RT, $RA, $RB", IIC_VecFP>; + +def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA), + "evsplatfi $RT, $RA", IIC_VecFP>; +def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA), + "evsplati $RT, $RA", IIC_VecFP>; + +def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), + "evsrwis $RT, $RA, $RB", IIC_VecFP>; +def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB), + "evsrwiu $RT, $RA, $RB", IIC_VecFP>; +def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evsrws $RT, $RA, $RB", IIC_VecFP>; +def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evsrwu $RT, $RA, $RB", IIC_VecFP>; + +def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstddx $RT, $RA, $RB", IIC_VecFP>; +def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstdhx $RT, $RA, $RB", IIC_VecFP>; +def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstdwx $RT, $RA, $RB", IIC_VecFP>; +def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstwhex $RT, $RA, $RB", IIC_VecFP>; +def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstwhox $RT, $RA, $RB", IIC_VecFP>; +def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstwwex $RT, $RA, $RB", IIC_VecFP>; +def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB), + "evstwwox $RT, $RA, $RB", IIC_VecFP>; + +def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA), + "evsubfssiaaw $RT, $RA", IIC_VecFP>; +def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA), + "evsubfsmiaaw $RT, $RA", IIC_VecFP>; +def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA), + "evsubfumiaaw $RT, $RA", IIC_VecFP>; +def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA), + "evsubfusiaaw $RT, $RA", IIC_VecFP>; +def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evsubfw $RT, $RA, $RB", IIC_VecFP>; +def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB), + "evsubifw $RT, $RA, $RB", IIC_VecFP>; +def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "evxor $RT, $RA, $RB", IIC_VecFP>; + +} // HasSPE diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 49bcc48..2c8f998 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -47,23 +47,24 @@ let Uses = [RM] in { // Load indexed instructions let mayLoad = 1, canFoldAsLoad = 1 in { - def LXSDX : XForm_1<31, 588, + def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, [(set f64:$XT, (load xoaddr:$src))]>; - def LXVD2X : XForm_1<31, 844, + def LXVD2X : XX1Form<31, 844, (outs vsrc:$XT), (ins memrr:$src), "lxvd2x $XT, $src", IIC_LdStLFD, - [(set v2f64:$XT, (load xoaddr:$src))]>; + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; - def LXVDSX : XForm_1<31, 332, + def LXVDSX : XX1Form<31, 332, (outs vsrc:$XT), (ins memrr:$src), "lxvdsx $XT, $src", IIC_LdStLFD, []>; - def LXVW4X : XForm_1<31, 780, + def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), - "lxvw4x $XT, $src", IIC_LdStLFD, []>; + "lxvw4x $XT, $src", IIC_LdStLFD, + [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; } // Store indexed instructions @@ -76,11 +77,12 @@ let Uses = [RM] in { def STXVD2X : XX1Form<31, 972, (outs), (ins vsrc:$XT, memrr:$dst), "stxvd2x $XT, $dst", IIC_LdStSTFD, - [(store v2f64:$XT, xoaddr:$dst)]>; + [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>; def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), - "stxvw4x $XT, $dst", IIC_LdStSTFD, []>; + "stxvw4x $XT, $dst", IIC_LdStSTFD, + [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>; } // Add/Mul Instructions @@ -641,24 +643,36 @@ let Uses = [RM] in { let isCommutable = 1 in { def XSMAXDP : XX3Form<60, 160, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), - "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + "xsmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; def XSMINDP : XX3Form<60, 168, (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), - "xsmindp $XT, $XA, $XB", IIC_VecFP, []>; + "xsmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; def XVMAXDP : XX3Form<60, 224, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + "xvmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; def XVMINDP : XX3Form<60, 232, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmindp $XT, $XA, $XB", IIC_VecFP, []>; + "xvmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; def XVMAXSP : XX3Form<60, 192, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>; + "xvmaxsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; def XVMINSP : XX3Form<60, 200, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvminsp $XT, $XA, $XB", IIC_VecFP, []>; + "xvminsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; } // isCommutable } // Uses = [RM] @@ -715,6 +729,31 @@ let Uses = [RM] in { (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; } // neverHasSideEffects + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after +// instruction selection into a branch sequence. +let usesCustomInserter = 1, // Expanded after instruction selection. + PPC970_Single = 1 in { + + def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst), + (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), + "#SELECT_CC_VSRC", + []>; + def SELECT_VSRC: Pseudo<(outs vsrc:$dst), + (ins crbitrc:$cond, vsrc:$T, vsrc:$F), + "#SELECT_VSRC", + [(set v2f64:$dst, + (select i1:$cond, v2f64:$T, v2f64:$F))]>; + def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst), + (ins crrc:$cond, f8rc:$T, f8rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSFRC", + []>; + def SELECT_VSFRC: Pseudo<(outs f8rc:$dst), + (ins crbitrc:$cond, f8rc:$T, f8rc:$F), + "#SELECT_VSFRC", + [(set f64:$dst, + (select i1:$cond, f64:$T, f64:$F))]>; +} // usesCustomInserter } // AddedComplexity def : InstAlias<"xvmovdp $XT, $XB", @@ -811,6 +850,49 @@ def : Pat<(sext_inreg v2i64:$C, v2i32), def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), (XVCVSXWDP (XXSLDWI $C, $C, 1))>; +// Loads. +def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + +// Stores. +def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; + +// Selects. +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), + (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), + (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +// Divides. +def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), + (XVDIVSP $A, $B)>; +def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), + (XVDIVDP $A, $B)>; + } // AddedComplexity } // HasVSX diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp deleted file mode 100644 index e5f113a..0000000 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ /dev/null @@ -1,482 +0,0 @@ -//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the JIT interfaces for the 32-bit PowerPC target. -// -//===----------------------------------------------------------------------===// - -#include "PPCJITInfo.h" -#include "PPCRelocations.h" -#include "PPCSubtarget.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "jit" - -static TargetJITInfo::JITCompilerFn JITCompilerFunction; - -PPCJITInfo::PPCJITInfo(PPCSubtarget &STI) - : Subtarget(STI), is64Bit(STI.isPPC64()) { - useGOT = 0; -} - -#define BUILD_ADDIS(RD,RS,IMM16) \ - ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) -#define BUILD_ORI(RD,RS,UIMM16) \ - ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) -#define BUILD_ORIS(RD,RS,UIMM16) \ - ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) -#define BUILD_RLDICR(RD,RS,SH,ME) \ - ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \ - (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1)) -#define BUILD_MTSPR(RS,SPR) \ - ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) -#define BUILD_BCCTRx(BO,BI,LINK) \ - ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) -#define BUILD_B(TARGET, LINK) \ - ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1)) - -// Pseudo-ops -#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) -#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6) -#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) -#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) - -static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ - intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2; - unsigned *AtI = (unsigned*)(intptr_t)At; - - if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? - AtI[0] = BUILD_B(Offset, isCall); // b/bl target - } else if (!is64Bit) { - AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address) - AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) - AtI[2] = BUILD_MTCTR(12); // mtctr r12 - AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl - } else { - AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address) - AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address) - AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32 - AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address) - AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) - AtI[5] = BUILD_MTCTR(12); // mtctr r12 - AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl - } -} - -extern "C" void PPC32CompilationCallback(); -extern "C" void PPC64CompilationCallback(); - -// The first clause of the preprocessor directive looks wrong, but it is -// necessary when compiling this code on non-PowerPC hosts. -#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__) -void PPC32CompilationCallback() { - llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!"); -} -#elif !defined(__ELF__) -// CompilationCallback stub - We can't use a C function with inline assembly in -// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, -// write our own wrapper, which does things our way, so we have complete control -// over register saving and restoring. -asm( - ".text\n" - ".align 2\n" - ".globl _PPC32CompilationCallback\n" -"_PPC32CompilationCallback:\n" - // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the - // FIXME: need to save v[0-19] for altivec? - // FIXME: could shrink frame - // Set up a proper stack frame - // FIXME Layout - // PowerPC32 ABI linkage - 24 bytes - // parameters - 32 bytes - // 13 double registers - 104 bytes - // 8 int registers - 32 bytes - "mflr r0\n" - "stw r0, 8(r1)\n" - "stwu r1, -208(r1)\n" - // Save all int arg registers - "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" - "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" - "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" - "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" - // Save all call-clobbered FP regs. - "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" - "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" - "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" - "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" - "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" - "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" - "stfd f1, 72(r1)\n" - // Arguments to Compilation Callback: - // r3 - our lr (address of the call instruction in stub plus 4) - // r4 - stub's lr (address of instruction that called the stub plus 4) - // r5 - is64Bit - always 0. - "mr r3, r0\n" - "lwz r2, 208(r1)\n" // stub's frame - "lwz r4, 8(r2)\n" // stub's lr - "li r5, 0\n" // 0 == 32 bit - "bl _LLVMPPCCompilationCallback\n" - "mtctr r3\n" - // Restore all int arg registers - "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" - "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" - "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" - "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" - // Restore all FP arg registers - "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" - "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" - "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" - "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" - "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" - "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" - "lfd f1, 72(r1)\n" - // Pop 3 frames off the stack and branch to target - "lwz r1, 208(r1)\n" - "lwz r2, 8(r1)\n" - "mtlr r2\n" - "bctr\n" - ); - -#else -// ELF PPC 32 support - -// CompilationCallback stub - We can't use a C function with inline assembly in -// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, -// write our own wrapper, which does things our way, so we have complete control -// over register saving and restoring. -asm( - ".text\n" - ".align 2\n" - ".globl PPC32CompilationCallback\n" -"PPC32CompilationCallback:\n" - // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the - // FIXME: need to save v[0-19] for altivec? - // FIXME: could shrink frame - // Set up a proper stack frame - // FIXME Layout - // 8 double registers - 64 bytes - // 8 int registers - 32 bytes - "mflr 0\n" - "stw 0, 4(1)\n" - "stwu 1, -104(1)\n" - // Save all int arg registers - "stw 10, 100(1)\n" "stw 9, 96(1)\n" - "stw 8, 92(1)\n" "stw 7, 88(1)\n" - "stw 6, 84(1)\n" "stw 5, 80(1)\n" - "stw 4, 76(1)\n" "stw 3, 72(1)\n" - // Save all call-clobbered FP regs. - "stfd 8, 64(1)\n" - "stfd 7, 56(1)\n" "stfd 6, 48(1)\n" - "stfd 5, 40(1)\n" "stfd 4, 32(1)\n" - "stfd 3, 24(1)\n" "stfd 2, 16(1)\n" - "stfd 1, 8(1)\n" - // Arguments to Compilation Callback: - // r3 - our lr (address of the call instruction in stub plus 4) - // r4 - stub's lr (address of instruction that called the stub plus 4) - // r5 - is64Bit - always 0. - "mr 3, 0\n" - "lwz 5, 104(1)\n" // stub's frame - "lwz 4, 4(5)\n" // stub's lr - "li 5, 0\n" // 0 == 32 bit - "bl LLVMPPCCompilationCallback\n" - "mtctr 3\n" - // Restore all int arg registers - "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" - "lwz 8, 92(1)\n" "lwz 7, 88(1)\n" - "lwz 6, 84(1)\n" "lwz 5, 80(1)\n" - "lwz 4, 76(1)\n" "lwz 3, 72(1)\n" - // Restore all FP arg registers - "lfd 8, 64(1)\n" - "lfd 7, 56(1)\n" "lfd 6, 48(1)\n" - "lfd 5, 40(1)\n" "lfd 4, 32(1)\n" - "lfd 3, 24(1)\n" "lfd 2, 16(1)\n" - "lfd 1, 8(1)\n" - // Pop 3 frames off the stack and branch to target - "lwz 1, 104(1)\n" - "lwz 0, 4(1)\n" - "mtlr 0\n" - "bctr\n" - ); -#endif - -#if !defined(__powerpc64__) && !defined(__ppc64__) -void PPC64CompilationCallback() { - llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!"); -} -#else -# ifdef __ELF__ -asm( - ".text\n" - ".align 2\n" - ".globl PPC64CompilationCallback\n" -#if _CALL_ELF == 2 - ".type PPC64CompilationCallback,@function\n" -"PPC64CompilationCallback:\n" -#else - ".section \".opd\",\"aw\",@progbits\n" - ".align 3\n" -"PPC64CompilationCallback:\n" - ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" - ".size PPC64CompilationCallback,24\n" - ".previous\n" - ".align 4\n" - ".type PPC64CompilationCallback,@function\n" -".L.PPC64CompilationCallback:\n" -#endif -# else -asm( - ".text\n" - ".align 2\n" - ".globl _PPC64CompilationCallback\n" -"_PPC64CompilationCallback:\n" -# endif - // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the - // FIXME: need to save v[0-19] for altivec? - // Set up a proper stack frame - // Layout - // PowerPC64 ABI linkage - 48 bytes - // parameters - 64 bytes - // 13 double registers - 104 bytes - // 8 int registers - 64 bytes - "mflr 0\n" - "std 0, 16(1)\n" - "stdu 1, -280(1)\n" - // Save all int arg registers - "std 10, 272(1)\n" "std 9, 264(1)\n" - "std 8, 256(1)\n" "std 7, 248(1)\n" - "std 6, 240(1)\n" "std 5, 232(1)\n" - "std 4, 224(1)\n" "std 3, 216(1)\n" - // Save all call-clobbered FP regs. - "stfd 13, 208(1)\n" "stfd 12, 200(1)\n" - "stfd 11, 192(1)\n" "stfd 10, 184(1)\n" - "stfd 9, 176(1)\n" "stfd 8, 168(1)\n" - "stfd 7, 160(1)\n" "stfd 6, 152(1)\n" - "stfd 5, 144(1)\n" "stfd 4, 136(1)\n" - "stfd 3, 128(1)\n" "stfd 2, 120(1)\n" - "stfd 1, 112(1)\n" - // Arguments to Compilation Callback: - // r3 - our lr (address of the call instruction in stub plus 4) - // r4 - stub's lr (address of instruction that called the stub plus 4) - // r5 - is64Bit - always 1. - "mr 3, 0\n" // return address (still in r0) - "ld 5, 280(1)\n" // stub's frame - "ld 4, 16(5)\n" // stub's lr - "li 5, 1\n" // 1 == 64 bit -# ifdef __ELF__ - "bl LLVMPPCCompilationCallback\n" - "nop\n" -# else - "bl _LLVMPPCCompilationCallback\n" -# endif - "mtctr 3\n" - // Restore all int arg registers - "ld 10, 272(1)\n" "ld 9, 264(1)\n" - "ld 8, 256(1)\n" "ld 7, 248(1)\n" - "ld 6, 240(1)\n" "ld 5, 232(1)\n" - "ld 4, 224(1)\n" "ld 3, 216(1)\n" - // Restore all FP arg registers - "lfd 13, 208(1)\n" "lfd 12, 200(1)\n" - "lfd 11, 192(1)\n" "lfd 10, 184(1)\n" - "lfd 9, 176(1)\n" "lfd 8, 168(1)\n" - "lfd 7, 160(1)\n" "lfd 6, 152(1)\n" - "lfd 5, 144(1)\n" "lfd 4, 136(1)\n" - "lfd 3, 128(1)\n" "lfd 2, 120(1)\n" - "lfd 1, 112(1)\n" - // Pop 3 frames off the stack and branch to target - "ld 1, 280(1)\n" - "ld 0, 16(1)\n" - "mtlr 0\n" - // XXX: any special TOC handling in the ELF case for JIT? - "bctr\n" - ); -#endif - -extern "C" { -LLVM_LIBRARY_VISIBILITY void * -LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { - // Adjust the pointer to the address of the call instruction in the stub - // emitted by emitFunctionStub, rather than the instruction after it. - unsigned *StubCallAddr = StubCallAddrPlus4 - 1; - unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; - - void *Target = JITCompilerFunction(StubCallAddr); - - // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite - // it to branch directly to the destination. If so, rewrite it so it does not - // need to go through the stub anymore. - unsigned OrigCallInst = *OrigCallAddr; - if ((OrigCallInst >> 26) == 18) { // Direct call. - intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; - - if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? - // Clear the original target out. - OrigCallInst &= (63 << 26) | 3; - // Fill in the new target. - OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; - // Replace the call. - *OrigCallAddr = OrigCallInst; - } - } - - // Assert that we are coming from a stub that was created with our - // emitFunctionStub. - if ((*StubCallAddr >> 26) == 18) - StubCallAddr -= 3; - else { - assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); - StubCallAddr -= is64Bit ? 9 : 6; - } - - // Rewrite the stub with an unconditional branch to the target, for any users - // who took the address of the stub. - EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); - sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); - - // Put the address of the target function to call and the address to return to - // after calling the target function in a place that is easy to get on the - // stack after we restore all regs. - return Target; -} -} - - - -TargetJITInfo::LazyResolverFn -PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { - JITCompilerFunction = Fn; - return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; -} - -TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { - // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 - // instructions to save the caller's address if this is a lazy-compilation - // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address - // into a register and jump through it. - StubLayout Result = {10*4, 4}; - return Result; -} - -#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ -defined(__APPLE__) -extern "C" void sys_icache_invalidate(const void *Addr, size_t len); -#endif - -void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { - // If this is just a call to an external function, emit a branch instead of a - // call. The code is the same except for one bit of the last instruction. - if (Fn != (void*)(intptr_t)PPC32CompilationCallback && - Fn != (void*)(intptr_t)PPC64CompilationCallback) { - void *Addr = (void*)JCE.getCurrentPCValue(); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); - sys::Memory::InvalidateInstructionCache(Addr, 7*4); - return Addr; - } - - void *Addr = (void*)JCE.getCurrentPCValue(); - if (is64Bit) { - JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) - JCE.emitWordBE(0x7d6802a6); // mflr r11 - JCE.emitWordBE(0xf9610060); // std r11, 96(r1) - } else if (Subtarget.isDarwinABI()){ - JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) - JCE.emitWordBE(0x7d6802a6); // mflr r11 - JCE.emitWordBE(0x91610028); // stw r11, 40(r1) - } else { - JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) - JCE.emitWordBE(0x7d6802a6); // mflr r11 - JCE.emitWordBE(0x91610024); // stw r11, 36(r1) - } - intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue(); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - JCE.emitWordBE(0); - EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); - sys::Memory::InvalidateInstructionCache(Addr, 10*4); - return Addr; -} - - -void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { - for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { - unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; - intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); - switch ((PPC::RelocationType)MR->getRelocationType()) { - default: llvm_unreachable("Unknown relocation type!"); - case PPC::reloc_pcrel_bx: - // PC-relative relocation for b and bl instructions. - ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; - assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && - "Relocation out of range!"); - *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; - break; - case PPC::reloc_pcrel_bcx: - // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other - // bcx instructions. - ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; - assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) && - "Relocation out of range!"); - *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2; - break; - case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr - case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr - ResultPtr += MR->getConstantVal(); - - // If this is a high-part access, get the high-part. - if (MR->getRelocationType() == PPC::reloc_absolute_high) { - // If the low part will have a carry (really a borrow) from the low - // 16-bits into the high 16, add a bit to borrow from. - if (((int)ResultPtr << 16) < 0) - ResultPtr += 1 << 16; - ResultPtr >>= 16; - } - - // Do the addition then mask, so the addition does not overflow the 16-bit - // immediate section of the instruction. - unsigned LowBits = (*RelocPos + ResultPtr) & 65535; - unsigned HighBits = *RelocPos & ~65535; - *RelocPos = LowBits | HighBits; // Slam into low 16-bits - break; - } - case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr - ResultPtr += MR->getConstantVal(); - // Do the addition then mask, so the addition does not overflow the 16-bit - // immediate section of the instruction. - unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; - unsigned HighBits = *RelocPos & 0xFFFF0003; - *RelocPos = LowBits | HighBits; // Slam into low 14-bits. - break; - } - } - } -} - -void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); - sys::Memory::InvalidateInstructionCache(Old, 7*4); -} diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h deleted file mode 100644 index b6b37ff..0000000 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ /dev/null @@ -1,46 +0,0 @@ -//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PowerPC implementation of the TargetJITInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef POWERPC_JITINFO_H -#define POWERPC_JITINFO_H - -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Target/TargetJITInfo.h" - -namespace llvm { -class PPCSubtarget; -class PPCJITInfo : public TargetJITInfo { -protected: - PPCSubtarget &Subtarget; - bool is64Bit; - -public: - PPCJITInfo(PPCSubtarget &STI); - - StubLayout getStubLayout() override; - void *emitFunctionStub(const Function *F, void *Fn, - JITCodeEmitter &JCE) override; - LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; - void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, - unsigned char *GOTBase) override; - - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - void replaceMachineCodeForFunction(void *Old, void *New) override; -}; -} - -#endif diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f8e84a5..880b520 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCSubtarget.h" #include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" @@ -37,14 +38,16 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ const TargetMachine &TM = AP.TM; Mangler *Mang = AP.Mang; - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); MCContext &Ctx = AP.OutContext; + bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin(); SmallString<128> Name; StringRef Suffix; - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) - Suffix = "$stub"; - else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) { + if (isDarwin) + Suffix = "$stub"; + } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) Suffix = "$non_lazy_ptr"; if (!Suffix.empty()) @@ -68,7 +71,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) { MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI(AP).getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -134,8 +137,17 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, case PPCII::MO_TLS: RefKind = MCSymbolRefExpr::VK_PPC_TLS; break; + case PPCII::MO_TLSGD: + RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; + break; + case PPCII::MO_TLSLD: + RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; + break; } + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) + RefKind = MCSymbolRefExpr::VK_PLT; + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 6a0aec8..4aff95a 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -8,8 +8,17 @@ //===----------------------------------------------------------------------===// #include "PPCMachineFunctionInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; void PPCFunctionInfo::anchor() { } +MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const { + const DataLayout *DL = MF.getSubtarget().getDataLayout(); + return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ + Twine(MF.getFunctionNumber())+"$poff"); +} diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 33f843d..83de799 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPC_MACHINE_FUNCTION_INFO_H -#define PPC_MACHINE_FUNCTION_INFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H #include "llvm/CodeGen/MachineFunction.h" @@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// 64-bit SVR4 ABI. SmallVector<unsigned, 3> MustSaveCRs; + /// Hold onto our MachineFunction context. + MachineFunction &MF; + + /// Whether this uses the PIC Base register or not. + bool UsesPICBase; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -109,7 +115,9 @@ public: VarArgsStackOffset(0), VarArgsNumGPR(0), VarArgsNumFPR(0), - CRSpillFrameIndex(0) {} + CRSpillFrameIndex(0), + MF(MF), + UsesPICBase(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -170,6 +178,11 @@ public: const SmallVectorImpl<unsigned> & getMustSaveCRs() const { return MustSaveCRs; } void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); } + + void setUsesPICBase(bool uses) { UsesPICBase = uses; } + bool usesPICBase() const { return UsesPICBase; } + + MCSymbol *getPICOffsetSymbol() const; }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h index 17b836d..8a1d680 100644 --- a/lib/Target/PowerPC/PPCPerfectShuffle.h +++ b/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H +#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H + // 31 entries have cost 0 // 292 entries have cost 1 // 1384 entries have cost 2 @@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = { 835584U, // <u,u,u,u>: Cost 0 copy LHS 0 }; + +#endif diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index eca774e..9b9966f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -140,8 +140,8 @@ PPCRegisterInfo::getNoPreservedMask() const { BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>( + MF.getSubtarget().getFrameLowering()); // The ZERO register is not really a register, but the representation of r0 // when used in instructions that treat r0 as the constant 0. @@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (PPCFI->needsFP(MF)) Reserved.set(PPC::R31); - if (hasBasePointer(MF)) + if (hasBasePointer(MF)) { + if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) + Reserved.set(PPC::R29); + else + Reserved.set(PPC::R30); + } + + if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -214,7 +223,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const unsigned DefaultSafety = 1; switch (RC->getID()) { @@ -278,7 +287,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the instruction info. - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Determine whether 64-bit pointers are used. bool LP64 = Subtarget.isPPC64(); DebugLoc dl = MI.getDebugLoc(); @@ -289,7 +298,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned FrameSize = MFI->getStackSize(); // Get stack alignments. - unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned TargetAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); assert((maxCallFrameSize & (MaxAlign-1)) == 0 && "Maximum call-frame size not sufficiently aligned"); @@ -394,7 +406,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -438,7 +450,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -511,7 +523,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -554,7 +566,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool LP64 = Subtarget.isPPC64(); @@ -601,7 +613,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -626,7 +638,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; @@ -706,7 +718,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Get the basic block's function. MachineFunction &MF = *MBB.getParent(); // Get the instruction info. - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -831,7 +843,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (!Subtarget.isPPC64()) return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; @@ -843,7 +855,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { if (!hasBasePointer(MF)) return getFrameRegister(MF); - return Subtarget.isPPC64() ? PPC::X30 : PPC::R30; + if (Subtarget.isPPC64()) + return PPC::X30; + + if (Subtarget.isSVR4ABI() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) + return PPC::R29; + + return PPC::R30; } bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { @@ -868,7 +887,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -885,16 +907,6 @@ bool PPCRegisterInfo:: needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { assert(Offset < 0 && "Local offset must be negative"); - unsigned FIOperandNum = 0; - while (!MI->getOperand(FIOperandNum).isFI()) { - ++FIOperandNum; - assert(FIOperandNum < MI->getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); - Offset += MI->getOperand(OffsetOperandNo).getImm(); - // It's the load/store FI references that cause issues, as it can be difficult // to materialize the offset if it won't fit in the literal field. Estimate // based on the size of the local frame and some conservative assumptions @@ -916,8 +928,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>( + MF.getSubtarget().getFrameLowering()); unsigned StackEst = PPCFI->determineFrameLayout(MF, false, true); @@ -951,7 +963,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); const MachineFunction &MF = *MBB->getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); @@ -976,7 +988,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = MI.getDesc(); MachineRegisterInfo &MRI = MF.getRegInfo(); MRI.constrainRegClass(BaseReg, @@ -985,6 +997,16 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { + unsigned FIOperandNum = 0; + while (!MI->getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); + Offset += MI->getOperand(OffsetOperandNo).getImm(); + return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 13a35f6..c182f95 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC32_REGISTERINFO_H -#define POWERPC32_REGISTERINFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H +#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H #include "PPC.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h deleted file mode 100644 index 0b392f9..0000000 --- a/lib/Target/PowerPC/PPCRelocations.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PowerPC 32-bit target-specific relocation types. -// -//===----------------------------------------------------------------------===// - -#ifndef PPCRELOCATIONS_H -#define PPCRELOCATIONS_H - -#include "llvm/CodeGen/MachineRelocation.h" - -// Hack to rid us of a PPC pre-processor symbol which is erroneously -// defined in a PowerPC header file (bug in Linux/PPC) -#ifdef PPC -#undef PPC -#endif - -namespace llvm { - namespace PPC { - enum RelocationType { - // reloc_vanilla - A standard relocation, where the address of the - // relocated object completely overwrites the address of the relocation. - reloc_vanilla, - - // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions. - reloc_pcrel_bx, - - // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE, - // and other bcx instructions. - reloc_pcrel_bcx, - - // reloc_absolute_high - Absolute relocation, for the loadhi instruction - // (which is really addis). Add the high 16-bits of the specified global - // address into the low 16-bits of the instruction. - reloc_absolute_high, - - // reloc_absolute_low - Absolute relocation, for the la instruction (which - // is really an addi). Add the low 16-bits of the specified global - // address into the low 16-bits of the instruction. - reloc_absolute_low, - - // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store - // instruction which have two implicit zero bits. - reloc_absolute_low_ix - }; - } -} - -#endif diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 1221d41..7f80121 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -106,6 +106,7 @@ def IIC_SprSLBIE : InstrItinClass; def IIC_SprSLBMTE : InstrItinClass; def IIC_SprSLBMFEE : InstrItinClass; def IIC_SprSLBIA : InstrItinClass; +def IIC_SprTLBIA : InstrItinClass; def IIC_SprTLBIEL : InstrItinClass; def IIC_SprTLBIE : InstrItinClass; diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index b2e7f3b..2c1378d 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPCCSELECTIONDAGINFO_H -#define POWERPCCSELECTIONDAGINFO_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H #include "llvm/Target/TargetSelectionDAGInfo.h" diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 2e1b74a..04e7ec6 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -33,13 +33,12 @@ using namespace llvm; #include "PPCGenSubtargetInfo.inc" /// Return the datalayout string of a subtarget. -static std::string getDataLayoutString(const PPCSubtarget &ST) { - const Triple &T = ST.getTargetTriple(); - +static std::string getDataLayoutString(const Triple &T) { + bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; std::string Ret; // Most PPC* platforms are big endian, PPC64LE is little endian. - if (ST.isLittleEndian()) + if (T.getArch() == Triple::ppc64le) Ret = "e"; else Ret = "E"; @@ -48,18 +47,18 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) { // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit // pointers. - if (!ST.isPPC64() || T.getOS() == Triple::Lv2) + if (!is64Bit || T.getOS() == Triple::Lv2) Ret += "-p:32:32"; // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). - if (ST.isPPC64() || ST.isSVR4ABI()) + if (is64Bit || !T.isOSDarwin()) Ret += "-i64:64"; else Ret += "-f64:32:64"; // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. - if (ST.isPPC64()) + if (is64Bit) Ret += "-n32:64"; else Ret += "-n32"; @@ -70,47 +69,20 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) { PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); + initSubtargetFeatures(CPU, FS); return *this; } PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, PPCTargetMachine &TM, - bool is64Bit, CodeGenOpt::Level OptLevel) - : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), - OptLevel(OptLevel), - FrameLowering(initializeSubtargetDependencies(CPU, FS)), - DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this), + const std::string &FS, const PPCTargetMachine &TM) + : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + DL(getDataLayoutString(TargetTriple)), + IsPPC64(TargetTriple.getArch() == Triple::ppc64 || + TargetTriple.getArch() == Triple::ppc64le), + TargetABI(PPC_ABI_UNKNOWN), + FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this), TLInfo(TM), TSInfo(&DL) {} -/// SetJITMode - This is called to inform the subtarget info that we are -/// producing code for the JIT. -void PPCSubtarget::SetJITMode() { - // JIT mode doesn't want lazy resolver stubs, it knows exactly where - // everything is. This matters for PPC64, which codegens in PIC mode without - // stubs. - HasLazyResolverStubs = false; - - // Calls to external functions need to use indirect calls - IsJITCodeModel = true; -} - -void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { - AttributeSet FnAttrs = MF->getFunction()->getAttributes(); - Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-cpu"); - Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-features"); - std::string CPU = - !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : ""; - std::string FS = - !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); - } -} - void PPCSubtarget::initializeEnvironment() { StackAlignment = 16; DarwinDirective = PPC::DIR_NONE; @@ -119,8 +91,10 @@ void PPCSubtarget::initializeEnvironment() { Use64BitRegs = false; UseCRBits = false; HasAltivec = false; + HasSPE = false; HasQPX = false; HasVSX = false; + HasP8Vector = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -136,13 +110,16 @@ void PPCSubtarget::initializeEnvironment() { HasPOPCNTD = false; HasLDBRX = false; IsBookE = false; + HasOnlyMSYNC = false; + IsPPC4xx = false; + IsPPC6xx = false; + IsE500 = false; DeprecatedMFTB = false; DeprecatedDST = false; HasLazyResolverStubs = false; - IsJITCodeModel = false; } -void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { +void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty()) @@ -156,35 +133,13 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); - // Make sure 64-bit features are available when CPUname is generic - std::string FullFS = FS; - - // If we are generating code for ppc64, verify that options make sense. - if (IsPPC64) { - Has64BitSupport = true; - // Silently force 64-bit register use on ppc64. - Use64BitRegs = true; - if (!FullFS.empty()) - FullFS = "+64bit," + FullFS; - else - FullFS = "+64bit"; - } - - // At -O2 and above, track CR bits as individual registers. - if (OptLevel >= CodeGenOpt::Default) { - if (!FullFS.empty()) - FullFS = "+crbits," + FullFS; - else - FullFS = "+crbits"; - } - // Parse features string. - ParseSubtargetFeatures(CPUName, FullFS); + ParseSubtargetFeatures(CPUName, FS); // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. - if (use64BitRegs() && !has64BitSupport()) - Use64BitRegs = false; + if (IsPPC64 && has64BitSupport()) + Use64BitRegs = true; // Set up darwin-specific properties. if (isDarwin()) @@ -201,8 +156,20 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // FIXME: For now, we disable VSX in little-endian mode until endian // issues in those instructions can be addressed. - if (IsLittleEndian) + if (IsLittleEndian) { HasVSX = false; + HasP8Vector = false; + } + + // Determine default ABI. + if (TargetABI == PPC_ABI_UNKNOWN) { + if (!isDarwin() && IsPPC64) { + if (IsLittleEndian) + TargetABI = PPC_ABI_ELFv2; + else + TargetABI = PPC_ABI_ELFv1; + } + } } /// hasLazyResolverStub - Return true if accesses to the specified global have @@ -213,31 +180,13 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, // We never have stubs if HasLazyResolverStubs=false or if in static mode. if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) return false; - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->isDeclaration(); if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage()) return false; return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage() || isDecl; } -bool PPCSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_ALL; - - CriticalPathRCs.clear(); - - if (isPPC64()) - CriticalPathRCs.push_back(&PPC::G8RCRegClass); - else - CriticalPathRCs.push_back(&PPC::GPRCRegClass); - - return OptLevel >= CodeGenOpt::Default; -} - // Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { @@ -259,6 +208,19 @@ bool PPCSubtarget::enableMachineScheduler() const { return needsAggressiveScheduling(DarwinDirective); } +// This overrides the PostRAScheduler bit in the SchedModel for each CPU. +bool PPCSubtarget::enablePostMachineScheduler() const { return true; } + +PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { + return TargetSubtargetInfo::ANTIDEP_ALL; +} + +void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(isPPC64() ? + &PPC::G8RCRegClass : &PPC::GPRCRegClass); +} + void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 2a16699..1df19c3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -11,13 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPCSUBTARGET_H -#define POWERPCSUBTARGET_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H +#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" -#include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -66,6 +65,12 @@ class TargetMachine; class PPCSubtarget : public PPCGenSubtargetInfo { protected: + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; + + // Calculates type size & alignment + const DataLayout DL; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; @@ -83,8 +88,10 @@ protected: bool UseCRBits; bool IsPPC64; bool HasAltivec; + bool HasSPE; bool HasQPX; bool HasVSX; + bool HasP8Vector; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -97,22 +104,23 @@ protected: bool HasPOPCNTD; bool HasLDBRX; bool IsBookE; + bool HasOnlyMSYNC; + bool IsE500; + bool IsPPC4xx; + bool IsPPC6xx; bool DeprecatedMFTB; bool DeprecatedDST; bool HasLazyResolverStubs; - bool IsJITCodeModel; bool IsLittleEndian; - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; - - /// OptLevel - What default optimization level we're emitting code for. - CodeGenOpt::Level OptLevel; + enum { + PPC_ABI_UNKNOWN, + PPC_ABI_ELFv1, + PPC_ABI_ELFv2 + } TargetABI; PPCFrameLowering FrameLowering; - const DataLayout DL; PPCInstrInfo InstrInfo; - PPCJITInfo JITInfo; PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; @@ -121,17 +129,12 @@ public: /// of the specified triple. /// PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, PPCTargetMachine &TM, bool is64Bit, - CodeGenOpt::Level OptLevel); + const std::string &FS, const PPCTargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// SetJITMode - This is called to inform the subtarget info that we are - /// producing code for the JIT. - void SetJITMode(); - /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. @@ -143,24 +146,32 @@ public: /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. - const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - - const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; } - const DataLayout *getDataLayout() const { return &DL; } - const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } - PPCJITInfo *getJITInfo() { return &JITInfo; } - const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } - const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + const PPCFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const DataLayout *getDataLayout() const override { return &DL; } + const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const PPCTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const PPCSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const PPCRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - /// \brief Reset the features for the PowerPC target. - void resetSubtargetFeatures(const MachineFunction *MF) override; private: void initializeEnvironment(); - void resetSubtargetFeatures(StringRef CPU, StringRef FS); + void initSubtargetFeatures(StringRef CPU, StringRef FS); public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. @@ -186,9 +197,6 @@ public: bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; - // isJITCodeModel - True if we're generating code for the JIT - bool isJITCodeModel() const { return IsJITCodeModel; } - // isLittleEndian - True if generating little-endian code bool isLittleEndian() const { return IsLittleEndian; } @@ -205,13 +213,19 @@ public: bool hasFPRND() const { return HasFPRND; } bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } + bool hasSPE() const { return HasSPE; } bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } + bool hasP8Vector() const { return HasP8Vector; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } + bool hasOnlyMSYNC() const { return HasOnlyMSYNC; } + bool isPPC4xx() const { return IsPPC4xx; } + bool isPPC6xx() const { return IsPPC6xx; } + bool isE500() const { return IsE500; } bool isDeprecatedMFTB() const { return DeprecatedMFTB; } bool isDeprecatedDST() const { return DeprecatedDST; } @@ -222,18 +236,22 @@ public: /// isBGQ - True if this is a BG/Q platform. bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } - - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; } bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. bool enableMachineScheduler() const override; + // This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + AntiDepBreakMode getAntiDepBreakMode() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 9563b90..f15189c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "PPCTargetMachine.h" +#include "PPCTargetObjectFile.h" #include "PPC.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCStreamer.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -37,15 +39,54 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); } +static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) { + std::string FullFS = FS; + Triple TargetTriple(TT); + + // Make sure 64-bit features are available when CPUname is generic + if (TargetTriple.getArch() == Triple::ppc64 || + TargetTriple.getArch() == Triple::ppc64le) { + if (!FullFS.empty()) + FullFS = "+64bit," + FullFS; + else + FullFS = "+64bit"; + } + + if (OL >= CodeGenOpt::Default) { + if (!FullFS.empty()) + FullFS = "+crbits," + FullFS; + else + FullFS = "+crbits"; + } + return FullFS; +} + +static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { + // If it isn't a Mach-O file then it's going to be a linux ELF + // object file. + if (TT.isOSDarwin()) + return make_unique<TargetLoweringObjectFileMachO>(); + + return make_unique<PPC64LinuxTargetObjectFile>(); +} + +// The FeatureString here is a little subtle. We are modifying the feature string +// with what are (currently) non-function specific overrides as it goes into the +// LLVMTargetMachine constructor and then using the stored value in the +// Subtarget constructor below it. PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, *this, is64Bit, OL) { + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM, + CM, OL), + TLOF(createTLOF(Triple(getTargetTriple()))), + Subtarget(TT, CPU, TargetFS, *this) { initAsmInfo(); } +PPCTargetMachine::~PPCTargetMachine() {} + void PPC32TargetMachine::anchor() { } PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, @@ -53,7 +94,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { } void PPC64TargetMachine::anchor() { } @@ -63,9 +104,34 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { } +const PPCSubtarget * +PPCTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this); + } + return I.get(); +} //===----------------------------------------------------------------------===// // Pass Pipeline Configuration @@ -86,6 +152,7 @@ public: return *getPPCTargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addPreISel() override; bool addILPOpts() override; bool addInstSelector() override; @@ -99,6 +166,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { return new PPCPassConfig(this, PM); } +void PPCPassConfig::addIRPasses() { + addPass(createAtomicExpandPass(&getPPCTargetMachine())); + TargetPassConfig::addIRPasses(); +} + bool PPCPassConfig::addPreISel() { if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) addPass(createPPCCTRLoops(getPPCTargetMachine())); @@ -148,18 +220,6 @@ bool PPCPassConfig::addPreEmitPass() { return false; } -bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) { - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho - // writing? - Subtarget.SetJITMode(); - - // Machine code emitter pass for PowerPC. - PM.add(createPPCJITCodeEmitterPass(*this, JCE)); - - return false; -} - void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our PPC pass. This // allows the PPC pass to delegate to the target independent layer when diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 4c7029c..5095d73 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPC_TARGETMACHINE_H -#define PPC_TARGETMACHINE_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H +#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H #include "PPCInstrInfo.h" #include "PPCSubtarget.h" @@ -24,46 +24,30 @@ namespace llvm { /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. /// class PPCTargetMachine : public LLVMTargetMachine { - PPCSubtarget Subtarget; + std::unique_ptr<TargetLoweringObjectFile> TLOF; + PPCSubtarget Subtarget; + + mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; public: PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool is64Bit); + CodeGenOpt::Level OL); - const PPCInstrInfo *getInstrInfo() const override { - return getSubtargetImpl()->getInstrInfo(); - } - const PPCFrameLowering *getFrameLowering() const override { - return getSubtargetImpl()->getFrameLowering(); - } - PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } - const PPCTargetLowering *getTargetLowering() const override { - return getSubtargetImpl()->getTargetLowering(); - } - const PPCSelectionDAGInfo* getSelectionDAGInfo() const override { - return getSubtargetImpl()->getSelectionDAGInfo(); - } - const PPCRegisterInfo *getRegisterInfo() const override { - return &getInstrInfo()->getRegisterInfo(); - } + ~PPCTargetMachine() override; - const DataLayout *getDataLayout() const override { - return getSubtargetImpl()->getDataLayout(); - } - const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const InstrItineraryData *getInstrItineraryData() const override { - return &getSubtargetImpl()->getInstrItineraryData(); - } + const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const PPCSubtarget *getSubtargetImpl(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) override; /// \brief Register PPC analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h index 3e71bbc..cd84da2 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.h +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H -#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 74b5f45..6493713 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPCTARGETSTREAMER_H -#define PPCTARGETSTREAMER_H +#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H +#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H #include "llvm/MC/MCStreamer.h" @@ -19,6 +19,8 @@ public: virtual ~PPCTargetStreamer(); virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; + virtual void emitAbiVersion(int AbiVersion) = 0; + virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0; }; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 007901b..37624ed 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -38,6 +38,7 @@ void initializePPCTTIPass(PassRegistry &); namespace { class PPCTTI final : public ImmutablePass, public TargetTransformInfo { + const TargetMachine *TM; const PPCSubtarget *ST; const PPCTargetLowering *TLI; @@ -47,16 +48,16 @@ public: } PPCTTI(const PPCTargetMachine *TM) - : ImmutablePass(ID), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getSubtargetImpl()->getTargetLowering()) { initializePPCTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() override { + void initializePass() override { pushTTIStack(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -64,7 +65,7 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -79,33 +80,31 @@ public: unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override; - virtual PopcntSupportKind - getPopcntSupport(unsigned TyWidth) const override; - virtual void getUnrollingPreferences( - Loop *L, UnrollingPreferences &UP) const override; + PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; + void getUnrollingPreferences(const Function *F, Loop *L, + UnrollingPreferences &UP) const override; /// @} /// \name Vector TTI Implementations /// @{ - virtual unsigned getNumberOfRegisters(bool Vector) const override; - virtual unsigned getRegisterBitWidth(bool Vector) const override; - virtual unsigned getMaximumUnrollFactor() const override; - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, - OperandValueKind) const override; - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const override; - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override; - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const override; - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const override; - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const override; + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getMaxInterleaveFactor() const override; + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind, OperandValueProperties, + OperandValueProperties) const override; + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const override; + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const override; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const override; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const override; + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override; /// @} }; @@ -271,8 +270,9 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return PPCTTI::getIntImmCost(Imm, Ty); } -void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { - if (ST->getDarwinDirective() == PPC::DIR_A2) { +void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L, + UnrollingPreferences &UP) const { + if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling // helps expose latency-hiding opportunities to the instruction scheduler. UP.Partial = UP.Runtime = true; @@ -297,7 +297,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { } -unsigned PPCTTI::getMaximumUnrollFactor() const { +unsigned PPCTTI::getMaxInterleaveFactor() const { unsigned Directive = ST->getDarwinDirective(); // The 440 has no SIMD support, but floating-point instructions // have a 5-cycle latency, so unroll by 5x for latency hiding. @@ -318,14 +318,15 @@ unsigned PPCTTI::getMaximumUnrollFactor() const { return 2; } -unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info, - OperandValueKind Op2Info) const { +unsigned PPCTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, - Op2Info); + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); } unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, |