From 36b56886974eae4f9c5ebc96befd3e7bfe5de338 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Wed, 23 Apr 2014 16:57:46 -0700 Subject: Update to LLVM 3.5a. Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617 --- lib/Target/PowerPC/AsmParser/CMakeLists.txt | 5 - lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 265 +++- lib/Target/PowerPC/CMakeLists.txt | 4 +- lib/Target/PowerPC/Disassembler/CMakeLists.txt | 3 + lib/Target/PowerPC/Disassembler/LLVMBuild.txt | 23 + lib/Target/PowerPC/Disassembler/Makefile | 16 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 345 ++++++ lib/Target/PowerPC/InstPrinter/CMakeLists.txt | 4 - lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 18 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 2 +- lib/Target/PowerPC/LLVMBuild.txt | 6 +- lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt | 2 - lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 51 +- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 100 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 23 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h | 5 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 144 ++- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 7 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 2 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 62 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 1 + lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 8 + lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h | 7 +- lib/Target/PowerPC/Makefile | 5 +- lib/Target/PowerPC/PPC.h | 7 + lib/Target/PowerPC/PPC.td | 49 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 184 ++- lib/Target/PowerPC/PPCBranchSelector.cpp | 9 + lib/Target/PowerPC/PPCCTRLoops.cpp | 42 +- lib/Target/PowerPC/PPCCallingConv.td | 20 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 3 +- lib/Target/PowerPC/PPCFastISel.cpp | 174 +-- lib/Target/PowerPC/PPCFrameLowering.cpp | 56 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 208 +++- lib/Target/PowerPC/PPCHazardRecognizers.h | 21 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 699 ++++++++++- lib/Target/PowerPC/PPCISelLowering.cpp | 1065 +++++++++++++++- lib/Target/PowerPC/PPCISelLowering.h | 27 +- lib/Target/PowerPC/PPCInstr64Bit.td | 430 ++++--- lib/Target/PowerPC/PPCInstrAltivec.td | 174 +-- lib/Target/PowerPC/PPCInstrFormats.td | 205 +++- lib/Target/PowerPC/PPCInstrInfo.cpp | 907 ++++++++++++-- lib/Target/PowerPC/PPCInstrInfo.h | 15 + lib/Target/PowerPC/PPCInstrInfo.td | 1297 +++++++++++++++----- lib/Target/PowerPC/PPCInstrVSX.td | 816 ++++++++++++ lib/Target/PowerPC/PPCJITInfo.cpp | 5 + lib/Target/PowerPC/PPCMCInstLower.cpp | 56 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 156 ++- lib/Target/PowerPC/PPCRegisterInfo.h | 13 +- lib/Target/PowerPC/PPCRegisterInfo.td | 86 +- lib/Target/PowerPC/PPCSchedule.td | 981 ++++++++------- lib/Target/PowerPC/PPCSchedule440.td | 1118 ++++++++--------- lib/Target/PowerPC/PPCScheduleA2.td | 246 ++-- lib/Target/PowerPC/PPCScheduleE500mc.td | 493 ++++---- lib/Target/PowerPC/PPCScheduleE5500.td | 592 +++++---- lib/Target/PowerPC/PPCScheduleG3.td | 119 +- lib/Target/PowerPC/PPCScheduleG4.td | 147 ++- lib/Target/PowerPC/PPCScheduleG4Plus.td | 168 +-- lib/Target/PowerPC/PPCScheduleG5.td | 180 +-- lib/Target/PowerPC/PPCScheduleP7.td | 385 ++++++ lib/Target/PowerPC/PPCSubtarget.cpp | 23 +- lib/Target/PowerPC/PPCSubtarget.h | 27 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 64 +- lib/Target/PowerPC/PPCTargetObjectFile.cpp | 30 +- lib/Target/PowerPC/PPCTargetObjectFile.h | 10 +- lib/Target/PowerPC/PPCTargetStreamer.h | 2 + lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 56 +- lib/Target/PowerPC/TargetInfo/CMakeLists.txt | 4 - lib/Target/PowerPC/TargetInfo/LLVMBuild.txt | 2 +- 69 files changed, 9136 insertions(+), 3343 deletions(-) create mode 100644 lib/Target/PowerPC/Disassembler/CMakeLists.txt create mode 100644 lib/Target/PowerPC/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/PowerPC/Disassembler/Makefile create mode 100644 lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp create mode 100644 lib/Target/PowerPC/PPCInstrVSX.td create mode 100644 lib/Target/PowerPC/PPCScheduleP7.td (limited to 'lib/Target/PowerPC') diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt index 3aa59c0..408858e 100644 --- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt +++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt @@ -1,8 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmParser PPCAsmParser.cpp ) - -add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index fe83fe1..8bb91cf 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -9,21 +9,22 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCMCExpr.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/MC/MCStreamer.h" +#include "PPCTargetStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -94,6 +95,44 @@ static unsigned VRegs[32] = { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; +static unsigned VSRegs[64] = { + PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, + PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, + PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, + PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, + PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, + PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, + PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, + PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, + + PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, + PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, + PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, + PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, + PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, + PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, + PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, + PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 +}; +static unsigned VSFRegs[64] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static unsigned CRBITRegs[32] = { PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, @@ -177,6 +216,7 @@ class PPCAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; const MCInstrInfo &MII; bool IsPPC64; + bool IsDarwin; MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -185,6 +225,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool isPPC64() const { return IsPPC64; } + bool isDarwin() const { return IsDarwin; } bool MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal); @@ -195,12 +236,14 @@ class PPCAsmParser : public MCTargetAsmParser { PPCMCExpr::VariantKind &Variant); const MCExpr *FixupVariantKind(const MCExpr *E); bool ParseExpression(const MCExpr *&EVal); + bool ParseDarwinExpression(const MCExpr *&EVal); bool ParseOperand(SmallVectorImpl &Operands); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveTC(unsigned Size, SMLoc L); bool ParseDirectiveMachine(SMLoc L); + bool ParseDarwinDirectiveMachine(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, @@ -227,6 +270,7 @@ public: Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); + IsDarwin = TheTriple.isMacOSX(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } @@ -339,6 +383,11 @@ public: return (unsigned) Imm.Val; } + unsigned getVSReg() const { + assert(isVSRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + unsigned getCCReg() const { assert(isCCRegNumber() && "Invalid access!"); return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); @@ -356,6 +405,7 @@ public: bool isToken() const { return Kind == Token; } bool isImm() const { return Kind == Immediate || Kind == Expression; } + bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } @@ -376,6 +426,7 @@ public: (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); } bool isCCRegNumber() const { return (Kind == Expression && isUInt<3>(getExprCRVal())) || (Kind == Immediate @@ -442,6 +493,16 @@ public: Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()])); } + void addRegVSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()])); + } + + void addRegVSFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); @@ -867,7 +928,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Post-process instructions (typically extended mnemonics) ProcessInstruction(Inst, Operands); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + Out.EmitInstruction(Inst, STI); return false; case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); @@ -1081,10 +1142,16 @@ FixupVariantKind(const MCExpr *E) { llvm_unreachable("Invalid expression kind!"); } -/// Parse an expression. This differs from the default "parseExpression" -/// in that it handles complex \code @l/@ha \endcode modifiers. +/// ParseExpression. This differs from the default "parseExpression" in that +/// it handles modifiers. bool PPCAsmParser:: ParseExpression(const MCExpr *&EVal) { + + if (isDarwin()) + return ParseDarwinExpression(EVal); + + // (ELF Platforms) + // Handle \code @l/@ha \endcode if (getParser().parseExpression(EVal)) return true; @@ -1098,6 +1165,55 @@ ParseExpression(const MCExpr *&EVal) { return false; } +/// ParseDarwinExpression. (MachO Platforms) +/// This differs from the default "parseExpression" in that it handles detection +/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present, +/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO +/// syntax form so it is done here. TODO: Determine if there is merit in arranging +/// for this to be done at a higher level. +bool PPCAsmParser:: +ParseDarwinExpression(const MCExpr *&EVal) { + PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None; + switch (getLexer().getKind()) { + default: + break; + case AsmToken::Identifier: + // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus + // something starting with any other char should be part of the + // asm syntax. If handwritten asm includes an identifier like lo16, + // then all bets are off - but no-one would do that, right? + StringRef poss = Parser.getTok().getString(); + if (poss.equals_lower("lo16")) { + Variant = PPCMCExpr::VK_PPC_LO; + } else if (poss.equals_lower("hi16")) { + Variant = PPCMCExpr::VK_PPC_HI; + } else if (poss.equals_lower("ha16")) { + Variant = PPCMCExpr::VK_PPC_HA; + } + if (Variant != PPCMCExpr::VK_PPC_None) { + Parser.Lex(); // Eat the xx16 + if (getLexer().isNot(AsmToken::LParen)) + return Error(Parser.getTok().getLoc(), "expected '('"); + Parser.Lex(); // Eat the '(' + } + break; + } + + if (getParser().parseExpression(EVal)) + return true; + + if (Variant != PPCMCExpr::VK_PPC_None) { + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + Parser.Lex(); // Eat the ')' + EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext()); + } + return false; +} + +/// ParseOperand +/// This handles registers in the form 'NN', '%rNN' for ELF platforms and +/// rNN for MachO. bool PPCAsmParser:: ParseOperand(SmallVectorImpl &Operands) { SMLoc S = Parser.getTok().getLoc(); @@ -1121,14 +1237,31 @@ ParseOperand(SmallVectorImpl &Operands) { } return Error(S, "invalid register name"); + case AsmToken::Identifier: + // Note that non-register-name identifiers from the compiler will begin + // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include + // identifiers like r31foo - so we fall through in the event that parsing + // a register name fails. + if (isDarwin()) { + unsigned RegNo; + int64_t IntVal; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + return false; + } + } + // Fall-through to process non-register-name identifiers as expression. // All other expressions case AsmToken::LParen: case AsmToken::Plus: case AsmToken::Minus: case AsmToken::Integer: - case AsmToken::Identifier: case AsmToken::Dot: case AsmToken::Dollar: + case AsmToken::Exclaim: + case AsmToken::Tilde: if (!ParseExpression(EVal)) break; /* fall through */ @@ -1177,11 +1310,25 @@ ParseOperand(SmallVectorImpl &Operands) { break; case AsmToken::Integer: - if (getParser().parseAbsoluteExpression(IntVal) || + if (!isDarwin()) { + if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 || IntVal > 31) return Error(S, "invalid register number"); + } else { + return Error(S, "unexpected integer value"); + } break; + case AsmToken::Identifier: + if (isDarwin()) { + unsigned RegNo; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + break; + } + } + // Fall-through.. + default: return Error(S, "invalid memory operand"); } @@ -1261,14 +1408,19 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, /// ParseDirective parses the PPC specific directives bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - if (IDVal == ".llong") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - if (IDVal == ".tc") - return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); - if (IDVal == ".machine") - return ParseDirectiveMachine(DirectiveID.getLoc()); + if (!isDarwin()) { + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + if (IDVal == ".llong") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + if (IDVal == ".tc") + return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + if (IDVal == ".machine") + return ParseDirectiveMachine(DirectiveID.getLoc()); + } else { + if (IDVal == ".machine") + return ParseDarwinDirectiveMachine(DirectiveID.getLoc()); + } return true; } @@ -1279,7 +1431,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { for (;;) { const MCExpr *Value; if (getParser().parseExpression(Value)) - return true; + return false; getParser().getStreamer().EmitValue(Value, Size); @@ -1303,8 +1455,10 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { while (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().isNot(AsmToken::Comma)) Parser.Lex(); - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); // Align to word size. @@ -1314,12 +1468,14 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { return ParseDirectiveWord(Size, L); } -/// ParseDirectiveMachine +/// ParseDirectiveMachine (ELF platforms) /// ::= .machine [ cpu | "push" | "pop" ] bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { if (getLexer().isNot(AsmToken::Identifier) && - getLexer().isNot(AsmToken::String)) - return Error(L, "unexpected token in directive"); + getLexer().isNot(AsmToken::String)) { + Error(L, "unexpected token in directive"); + return false; + } StringRef CPU = Parser.getTok().getIdentifier(); Parser.Lex(); @@ -1329,11 +1485,56 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { // Implement ".machine any" (by doing nothing) for the benefit // of existing assembler code. Likewise, we can then implement // ".machine push" and ".machine pop" as no-op. - if (CPU != "any" && CPU != "push" && CPU != "pop") - return Error(L, "unrecognized machine type"); + if (CPU != "any" && CPU != "push" && CPU != "pop") { + Error(L, "unrecognized machine type"); + return false; + } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + PPCTargetStreamer &TStreamer = + *static_cast( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitMachine(CPU); + + return false; +} + +/// ParseDarwinDirectiveMachine (Mach-o platforms) +/// ::= .machine cpu-identifier +bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier) && + getLexer().isNot(AsmToken::String)) { + Error(L, "unexpected token in directive"); + return false; + } + + StringRef CPU = Parser.getTok().getIdentifier(); + Parser.Lex(); + + // FIXME: this is only the 'default' set of cpu variants. + // However we don't act on this information at present, this is simply + // allowing parsing to proceed with minimal sanity checking. + if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") { + Error(L, "unrecognized cpu type"); + return false; + } + + if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) { + Error(L, "wrong cpu type specified for 64bit"); + return false; + } + if (!isPPC64() && CPU == "ppc64") { + Error(L, "wrong cpu type specified for 32bit"); + return false; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } return false; } diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 9a763f5..ea4de63 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) @@ -34,9 +35,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -add_dependencies(LLVMPowerPCCodeGen PowerPCCommonTableGen intrinsics_gen) - add_subdirectory(AsmParser) +add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..ca457df --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMPowerPCDisassembler + PPCDisassembler.cpp + ) diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..7f29040 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCDisassembler +parent = PowerPC +required_libraries = MC Support PowerPCDesc PowerPCInfo +add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/Disassembler/Makefile b/lib/Target/PowerPC/Disassembler/Makefile new file mode 100644 index 0000000..86e3b47 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCDisassembler + +# Hack: we need to include 'main' PPC target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp new file mode 100644 index 0000000..c4a7544 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -0,0 +1,345 @@ +//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class PPCDisassembler : public MCDisassembler { +public: + PPCDisassembler(const MCSubtargetInfo &STI) + : MCDisassembler(STI) {} + virtual ~PPCDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const override; +}; +} // end anonymous namespace + +static MCDisassembler *createPPCDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new PPCDisassembler(STI); +} + +extern "C" void LLVMInitializePowerPCDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(ThePPC32Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget, + createPPCDisassembler); +} + +// FIXME: These can be generated by TableGen from the existing register +// encoding values! + +static const unsigned CRRegs[] = { + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 +}; + +static const unsigned CRBITRegs[] = { + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN +}; + +static const unsigned FRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31 +}; + +static const unsigned VRegs[] = { + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +static const unsigned VSRegs[] = { + PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, + PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, + PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, + PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, + PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, + PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, + PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, + PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, + + PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, + PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, + PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, + PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, + PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, + PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, + PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, + PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 +}; + +static const unsigned VSFRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + +static const unsigned GPRegs[] = { + PPC::R0, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned GP0Regs[] = { + PPC::ZERO, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned G8Regs[] = { + PPC::X0, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; + +template +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned (&Regs)[N]) { + assert(RegNo < N && "Invalid register number"); + Inst.addOperand(MCOperand::CreateReg(Regs[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + +static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRBITRegs); +} + +static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VRegs); +} + +static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSRegs); +} + +static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSFRegs); +} + +static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GPRegs); +} + +static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GP0Regs); +} + +static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, G8Regs); +} + +#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass +#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass + +template +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memri field (imm, reg), which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 16; + uint64_t Disp = Imm & 0xFFFF; + + assert(Base < 32 && "Invalid base register"); + + switch (Inst.getOpcode()) { + default: break; + case PPC::LBZU: + case PPC::LHAU: + case PPC::LHZU: + case PPC::LWZU: + case PPC::LFSU: + case PPC::LFDU: + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + break; + case PPC::STBU: + case PPC::STHU: + case PPC::STWU: + case PPC::STFSU: + case PPC::STFDU: + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + break; + } + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memrix field (imm, reg), which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 14; + uint64_t Disp = Imm & 0x3FFF; + + assert(Base < 32 && "Invalid base register"); + + if (Inst.getOpcode() == PPC::LDU) + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + else if (Inst.getOpcode() == PPC::STDU) + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // The cr bit encoding is 0x80 >> cr_reg_num. + + unsigned Zeros = countTrailingZeros(Imm); + assert(Zeros < 8 && "Invalid CR bit value"); + + Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros])); + return MCDisassembler::Success; +} + +#include "PPCGenDisassemblerTables.inc" + +DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the four bytes of the instruction. + uint8_t Bytes[4]; + Size = 4; + if (Region.readBytes(Address, Size, Bytes) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // The instruction is big-endian encoded. + uint32_t Inst = (Bytes[0] << 24) | + (Bytes[1] << 16) | + (Bytes[2] << 8) | + (Bytes[3] << 0); + + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); +} + diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt index a605cc4..ab30a11 100644 --- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmPrinter PPCInstPrinter.cpp ) - -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 8281b5c..dc54b52 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -149,6 +149,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU: O << "nu"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } @@ -184,6 +187,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU_PLUS: O << "+"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } @@ -193,6 +199,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 3 && "Invalid u2imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); @@ -310,7 +323,10 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': - case 'v': return RegName + 1; + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8a4c03d..4d1df78 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -43,7 +43,7 @@ public: void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = 0); - + void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 7b3e843..9d173d6 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -16,18 +16,20 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = PowerPC parent = Target +has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] type = Library name = PowerPCCodeGen parent = PowerPC -required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index 3efa5ec..3cea65e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,3 @@ add_llvm_library(LLVMPowerPCDesc PPCMachObjectWriter.cpp PPCELFObjectWriter.cpp ) - -add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 0d42081..f7309bb 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -71,14 +71,16 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { namespace { class PPCAsmBackend : public MCAsmBackend { -const Target &TheTarget; + const Target &TheTarget; + bool IsLittleEndian; public: - PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} + PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T), + IsLittleEndian(isLittle) {} unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = { + const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, @@ -88,17 +90,27 @@ public: { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; + const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { + // name offset bits flags + { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24abs", 2, 24, 0 }, + { "fixup_ppc_brcond14abs", 2, 14, 0 }, + { "fixup_ppc_half16", 0, 16, 0 }, + { "fixup_ppc_half16ds", 2, 14, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } + }; if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; + return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { + uint64_t Value, bool IsPCRel) const { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -108,8 +120,10 @@ public: // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i); + Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff); + } } bool mayNeedRelaxation(const MCInst &Inst) const { @@ -152,6 +166,10 @@ public: assert(Name == "ppc32" && "Unknown target name!"); return 4; } + + bool isLittleEndian() const { + return IsLittleEndian; + } }; } // end anonymous namespace @@ -160,7 +178,7 @@ public: namespace { class DarwinPPCAsmBackend : public PPCAsmBackend { public: - DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } + DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; @@ -170,26 +188,18 @@ namespace { (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), MachO::CPU_SUBTYPE_POWERPC_ALL); } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; - } }; class ELFPPCAsmBackend : public PPCAsmBackend { uint8_t OSABI; public: - ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : - PPCAsmBackend(T), OSABI(OSABI) { } + ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : + PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; - return createPPCELFObjectWriter(OS, is64, OSABI); - } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; + return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } }; @@ -202,5 +212,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, return new DarwinPPCAsmBackend(T); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFPPCAsmBackend(T, OSABI); + bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le; + return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 54de70e..d19f6a0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -27,17 +28,8 @@ namespace { virtual unsigned getRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend) const; - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const; - virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const; + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; }; } @@ -49,12 +41,38 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } +static MCSymbolRefExpr::VariantKind getAccessVariant(const MCFixup &Fixup) { + const MCExpr *Expr = Fixup.getValue(); + + if (Expr->getKind() != MCExpr::Target) + return Fixup.getAccessVariant(); + + switch (cast(Expr)->getKind()) { + case PPCMCExpr::VK_PPC_None: + return MCSymbolRefExpr::VK_None; + case PPCMCExpr::VK_PPC_LO: + return MCSymbolRefExpr::VK_PPC_LO; + case PPCMCExpr::VK_PPC_HI: + return MCSymbolRefExpr::VK_PPC_HI; + case PPCMCExpr::VK_PPC_HA: + return MCSymbolRefExpr::VK_PPC_HA; + case PPCMCExpr::VK_PPC_HIGHERA: + return MCSymbolRefExpr::VK_PPC_HIGHERA; + case PPCMCExpr::VK_PPC_HIGHER: + return MCSymbolRefExpr::VK_PPC_HIGHER; + case PPCMCExpr::VK_PPC_HIGHEST: + return MCSymbolRefExpr::VK_PPC_HIGHEST; + case PPCMCExpr::VK_PPC_HIGHESTA: + return MCSymbolRefExpr::VK_PPC_HIGHESTA; + } + llvm_unreachable("unknown PPCMCExpr kind"); +} + unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? - MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Fixup); // determine the type of the relocation unsigned Type; @@ -356,64 +374,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { + bool IsPCRel) const { return getRelocTypeInner(Target, Fixup, IsPCRel); } -const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const { - assert(Target.getSymA() && "SymA cannot be 0"); - MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? - MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); - - bool EmitThisSym; - switch (Modifier) { - // GOT references always need a relocation, even if the - // target symbol is local. - case MCSymbolRefExpr::VK_GOT: - case MCSymbolRefExpr::VK_PPC_GOT_LO: - case MCSymbolRefExpr::VK_PPC_GOT_HI: - case MCSymbolRefExpr::VK_PPC_GOT_HA: - EmitThisSym = true; - break; - default: - EmitThisSym = false; - break; - } - - if (EmitThisSym) - return &Target.getSymA()->getSymbol().AliasedSymbol(); - return NULL; -} - -const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - assert(Target.getSymA() && "SymA cannot be 0"); - const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); - - unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel); - - // The .odp creation emits a relocation against the symbol ".TOC." which - // create a R_PPC64_TOC relocation. However the relocation symbol name - // in final object creation should be NULL, since the symbol does not - // really exist, it is just the reference to TOC base for the current - // object file. - bool EmitThisSym = RelocType != ELF::R_PPC64_TOC; - - if (EmitThisSym && !Symbol.isTemporary()) - return &Symbol; - return NULL; -} - MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index f3dddce..18609e1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "PPCMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } -PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } @@ -30,22 +32,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { AssemblerDialect = 1; // New-Style mnemonics. SupportsDebugInformation= true; // Debug information. + + // The installed assembler for OSX < 10.6 lacks some directives. + // FIXME: this should really be a check on the assembler characteristics + // rather than OS version + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) + HasWeakDefCanBeHiddenDirective = false; + + UseIntegratedAssembler = true; } void PPCLinuxMCAsmInfo::anchor() { } -PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { +PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } - IsLittleEndian = false; + IsLittleEndian = T.getArch() == Triple::ppc64le; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; // Uses '.section' before '.bss' directive UsesELFSectionDirectiveForBSS = true; @@ -65,5 +73,10 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; AssemblerDialect = 1; // New-Style mnemonics. + + if (T.getOS() == llvm::Triple::FreeBSD || + (T.getOS() == llvm::Triple::NetBSD && !is64Bit) || + (T.getOS() == llvm::Triple::OpenBSD && !is64Bit)) + UseIntegratedAssembler = true; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 1530e77..cee2cb7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -18,17 +18,18 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { +class Triple; class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { virtual void anchor(); public: - explicit PPCMCAsmInfoDarwin(bool is64Bit); + explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; class PPCLinuxMCAsmInfo : public MCAsmInfoELF { virtual void anchor(); public: - explicit PPCLinuxMCAsmInfo(bool is64Bit); + explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&); }; } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 346a9be..b259c5d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -33,70 +33,85 @@ class PPCMCCodeEmitter : public MCCodeEmitter { PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; - const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; const MCContext &CTX; - Triple TT; + bool IsLittleEndian; public: - PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) { + PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle) + : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) { } ~PPCMCCodeEmitter() {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups) const; + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { // For fast-isel, a float COPY_TO_REGCLASS can survive this long. // It's just a nop to keep the register classes happy, so don't // generate anything. unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); if (Opcode == TargetOpcode::COPY_TO_REGCLASS) return; - uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - // BL8_NOP etc. all have a size of 8 because of the following 'nop'. - unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! - if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || - Opcode == PPC::BL8_NOP_TLS) - Size = 8; - - // Output the constant in big endian byte order. - int ShiftValue = (Size * 8) - 8; - for (unsigned i = 0; i != Size; ++i) { - OS << (char)(Bits >> ShiftValue); - Bits <<= 8; + // Output the constant in big/little endian byte order. + unsigned Size = Desc.getSize(); + if (IsLittleEndian) { + for (unsigned i = 0; i != Size; ++i) { + OS << (char)Bits; + Bits >>= 8; + } + } else { + int ShiftValue = (Size * 8) - 8; + for (unsigned i = 0; i != Size; ++i) { + OS << (char)(Bits >> ShiftValue); + Bits <<= 8; + } } ++MCNumEmitted; // Keep track of the # of mi's emitted. @@ -110,14 +125,17 @@ MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new PPCMCCodeEmitter(MCII, STI, Ctx); + Triple TT(STI.getTargetTriple()); + bool IsLittleEndian = TT.getArch() == Triple::ppc64le; + return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian); } unsigned PPCMCCodeEmitter:: getDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -126,9 +144,10 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, } unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -138,9 +157,10 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -150,9 +170,10 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -161,79 +182,87 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, } unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the immediate field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); return 0; } unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { // Encode (imm, reg) as a memri, which has the low 16-bits as the // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16; + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16; const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; + return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); return RegBits; } unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { // Encode (imm, reg) as a memrix, which has the low 14-bits as the // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14; + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14; const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits; + return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16ds)); return RegBits; } unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the TLS register, which simply provides a relocation // hint to the linker that this statement is part of a relocation sequence. // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); - return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); + Triple TT(STI.getTargetTriple()); + bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le; + return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2); } unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { // For special TLS calls, we need two fixups; one for the branch target // (__tls_get_addr), which we create via getDirectBrEncoding as usual, // and one for the TLSGD or TLSLD symbol, which is emitted here. const MCOperand &MO = MI.getOperand(OpNo+1); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); - return getDirectBrEncoding(MI, OpNo, Fixups); + return getDirectBrEncoding(MI, OpNo, Fixups, STI); } unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && @@ -244,7 +273,8 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups) const { + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { if (MO.isReg()) { // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d7e8402..c181e03 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -9,9 +9,9 @@ #define DEBUG_TYPE "ppcmcexpr" #include "PPCMCExpr.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" using namespace llvm; @@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { MCValue Value; - if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout)) return false; if (Value.isAbsolute()) { @@ -86,6 +86,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, } Res = MCValue::get(Result); } else { + if (!Layout) + return false; + MCContext &Context = Layout->getAssembler().getContext(); const MCSymbolRefExpr *Sym = Value.getSymA(); MCSymbolRefExpr::VariantKind Modifier = Sym->getKind(); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index e44c7c1..5fc7918 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -10,9 +10,9 @@ #ifndef PPCMCEXPR_H #define PPCMCEXPR_H +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f18d095..105c511 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -39,6 +39,7 @@ using namespace llvm; // Pin the vtable to this file. PPCTargetStreamer::~PPCTargetStreamer() {} +PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} static MCInstrInfo *createPPCMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); @@ -72,9 +73,9 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) - MAI = new PPCMCAsmInfoDarwin(isPPC64); + MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else - MAI = new PPCLinuxMCAsmInfo(isPPC64); + MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple); // Initial state of the frame pointer is R1. unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; @@ -112,7 +113,8 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer { formatted_raw_ostream &OS; public: - PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {} + PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) + : PPCTargetStreamer(S), OS(OS) {} virtual void emitTCEntry(const MCSymbol &S) { OS << "\t.tc "; OS << S.getName(); @@ -120,12 +122,33 @@ public: OS << S.getName(); OS << '\n'; } + virtual void emitMachine(StringRef CPU) { + OS << "\t.machine " << CPU << '\n'; + } }; class PPCTargetELFStreamer : public PPCTargetStreamer { +public: + PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} virtual void emitTCEntry(const MCSymbol &S) { // Creates a R_PPC64_TOC relocation - Streamer->EmitSymbolValue(&S, 8); + Streamer.EmitSymbolValue(&S, 8); + } + virtual void emitMachine(StringRef CPU) { + // FIXME: Is there anything to do in here or does this directive only + // limit the parser? + } +}; + +class PPCTargetMachOStreamer : public PPCTargetStreamer { +public: + PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + virtual void emitTCEntry(const MCSymbol &S) { + llvm_unreachable("Unknown pseudo-op: .tc"); + } + virtual void emitMachine(StringRef CPU) { + // FIXME: We should update the CPUType, CPUSubType in the Object file if + // the new values are different from the defaults. } }; } @@ -135,25 +158,32 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll, bool NoExecStack) { - if (Triple(TT).isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + if (Triple(TT).isOSDarwin()) { + MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + new PPCTargetMachOStreamer(*S); + return S; + } - PPCTargetStreamer *S = new PPCTargetELFStreamer(); - return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack); + MCStreamer *S = + createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + new PPCTargetELFStreamer(*S); + return S; } static MCStreamer * createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, MCInstPrinter *InstPrint, - MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS); - - return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, - useDwarfDirectory, InstPrint, CE, TAB, - ShowInst); + bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + MCInstPrinter *InstPrint, MCCodeEmitter *CE, + MCAsmBackend *TAB, bool ShowInst) { + + MCStreamer *S = + llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, + InstPrint, CE, TAB, ShowInst); + new PPCTargetAsmStreamer(*S, OS); + return S; } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 0b0ca24..474395b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -46,6 +46,7 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI); /// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 63facc5..c2987b6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -42,6 +42,10 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS; case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS; case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS; + + // Simple predicates for single condition-register bits. + case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET; + case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET; } llvm_unreachable("Unknown PPC branch opcode!"); } @@ -72,6 +76,10 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS; case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS; case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS; + + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index d498c2f..10e328a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -48,7 +48,12 @@ namespace PPC { PRED_GT_PLUS = (1 << 5) | 15, PRED_NE_PLUS = (2 << 5) | 7, PRED_UN_PLUS = (3 << 5) | 15, - PRED_NU_PLUS = (3 << 5) | 7 + PRED_NU_PLUS = (3 << 5) | 7, + + // When dealing with individual condition-register bits, we have simple set + // and unset predicates. + PRED_BIT_SET = 1024, + PRED_BIT_UNSET = 1025 }; /// Invert the specified predicate. != -> ==, < -> >=. diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 21fdcd9..c966748 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ - PPCGenMCCodeEmitter.inc PPCGenFastISel.inc + PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ + PPCGenDisassemblerTables.inc -DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc +DIRS = AsmParser Disassembler InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index f0d5af2..c42c5be 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -23,6 +23,7 @@ namespace llvm { class PPCTargetMachine; + class PassRegistry; class FunctionPass; class ImmutablePass; class JITCodeEmitter; @@ -35,6 +36,9 @@ namespace llvm { FunctionPass *createPPCCTRLoopsVerify(); #endif FunctionPass *createPPCEarlyReturnPass(); + FunctionPass *createPPCVSXCopyPass(); + FunctionPass *createPPCVSXCopyCleanupPass(); + FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, @@ -45,6 +49,9 @@ namespace llvm { /// \brief Creates an PPC-specific Target Transformation Info pass. ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); + void initializePPCVSXFMAMutatePass(PassRegistry&); + extern char &PPCVSXFMAMutateID; + namespace PPCII { /// Target Operand Flag enum. diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 54e3d40..bd58539 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -51,6 +51,8 @@ def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", + "Use condition-register bits individually">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions">; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", @@ -88,7 +90,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", - "Enable VSX instructions">; + "Enable VSX instructions", + [FeatureAltivec]>; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -110,6 +113,12 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", // their record-form variants. class RecFormRel; +// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX +// FMA instruction forms with their corresponding factor-killing forms. +class AltVSXFMARel { + bit IsVSXFMAAlt = 0; +} + //===----------------------------------------------------------------------===// // Relation Map Definitions. //===----------------------------------------------------------------------===// @@ -140,6 +149,19 @@ def getNonRecordFormOpcode : InstrMapping { let ValueCols = [["0"]]; } +def getAltVSXFMAOpcode : InstrMapping { + let FilterClass = "AltVSXFMARel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName"]; + // Instructions with the same RC value form a column. + let ColFields = ["IsVSXFMAAlt"]; + // The key column are the (default) addend-killing instructions. + let KeyCol = ["0"]; + // Value columns IsVSXFMAAlt=1 + let ValueCols = [["1"]]; +} + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -153,12 +175,12 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, @@ -254,7 +276,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", G5Model, +def : ProcessorModel<"pwr7", P7Model, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, @@ -283,11 +305,11 @@ include "PPCCallingConv.td" def PPCInstrInfo : InstrInfo { let isLittleEndianEncoding = 1; -} -def PPCAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; + // FIXME: Unset this when no longer needed! + let decodePositionallyEncodedOperands = 1; + + let noNamedPositionallyEncodedOperands = 1; } def PPCAsmParser : AsmParser { @@ -306,8 +328,7 @@ def PPCAsmParserVariant : AsmParserVariant { def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; - - let AssemblyWriters = [PPCAsmWriter]; + let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index ada34ed..9ce8ea9 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -19,24 +19,24 @@ #define DEBUG_TYPE "asmprinter" #include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" -#include "MCTargetDesc/PPCPredicates.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -54,7 +54,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -130,7 +129,10 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': - case 'v': return RegName + 1; + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } @@ -139,6 +141,7 @@ static const char *stripRegisterPrefix(const char *RegName) { void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -157,37 +160,13 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - // FIXME: PIC relocation model - return; case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: O << *GetBlockAddressSymbol(MO.getBlockAddress()); return; - case MachineOperand::MO_ExternalSymbol: { - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() == Reloc::Static) { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - } - - MCSymbol *NLPSym = - OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ - MO.getSymbolName()+"$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getGVStubEntry(NLPSym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); - - O << *NLPSym; - return; - } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. const GlobalValue *GV = MO.getGlobal(); @@ -197,7 +176,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (TM.getRelocationModel() != Reloc::Static && (GV->isDeclaration() || GV->isWeakForLinker())) { if (!GV->hasHiddenVisibility()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo() .getGVStubEntry(SymToPrint); @@ -206,7 +185,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo(). @@ -305,12 +284,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - + const DataLayout *DL = TM.getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; // To avoid name clash check if the name already exists. while (TOCEntry == 0) { - if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + "C" + Twine(TOCLabelID++)) == 0) { TOCEntry = GetTempSymbol("C", TOCLabelID); } @@ -325,6 +304,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; + bool isPPC64 = Subtarget.isPPC64(); // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { @@ -340,7 +320,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); @@ -376,7 +356,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } @@ -401,8 +381,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalValue *RealGValue = + GAlias ? GAlias->getAliasedGlobal() : GValue; MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); @@ -422,7 +402,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::LDtocL: { @@ -448,8 +428,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalValue *RealGValue = + GAlias ? GAlias->getAliasedGlobal() : GValue; MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast(RealGValue); @@ -463,7 +443,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::ADDItocL: { @@ -483,8 +463,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalValue *RealGValue = + GAlias ? GAlias->getAliasedGlobal() : GValue; MOSymbol = getSymbol(RealGValue); const GlobalVariable *GVar = dyn_cast(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); @@ -499,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::ADDISgotTprelHA: { @@ -512,18 +492,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTprel)); return; } - case PPC::LDgotTprelL: { + case PPC::LDgotTprelL: + case PPC::LDgotTprelL32: { // Transform %Xd = LDgotTprelL , %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD. - TmpInst.setOpcode(PPC::LD); + TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -531,7 +512,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + + case PPC::PPC32GOT: { + MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + const MCExpr *SymGotTlsL = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, + OutContext); + const MCExpr *SymGotTlsHA = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, + OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsL)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsHA)); return; } case PPC::ADDIStlsgdHA: { @@ -544,7 +543,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTlsGD)); @@ -560,7 +559,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -581,7 +580,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -596,7 +595,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTlsLD)); @@ -612,7 +611,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -633,7 +632,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -648,7 +647,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X3) .addExpr(SymDtprel)); @@ -664,7 +663,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -679,7 +678,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8; OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode) .addReg(MI->getOperand(0).getReg())); return; } @@ -695,19 +694,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { ->getEncodingValue(MI->getOperand(0).getReg()); OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode) .addImm(Mask) .addReg(MI->getOperand(1).getReg())); return; } break; - case PPC::SYNC: - // In Book E sync is called msync, handle this special case here... - if (Subtarget.isBookE()) { - OutStreamer.EmitRawText(StringRef("\tmsync")); - return; - } - break; case PPC::LD: case PPC::STD: case PPC::LWA_32: @@ -730,7 +722,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); } void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { @@ -773,7 +765,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TD->getPointerSizeInBits() == 64; PPCTargetStreamer &TS = - static_cast(OutStreamer.getTargetStreamer()); + static_cast(*OutStreamer.getTargetStreamer()); if (isPPC64 && !TOC.empty()) { const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", @@ -861,13 +853,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget.isPPC64() && Directive < PPC::DIR_64) Directive = PPC::DIR_64; assert(Directive <= PPC::DIR_64 && "Directive out of range."); - - // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) { - assert(Directive < array_lengthof(CPUDirectives) && - "CPUDirectives[] might not be up-to-date!"); - OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); - } + + assert(Directive < array_lengthof(CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); + PPCTargetStreamer &TStreamer = + *static_cast(OutStreamer.getTargetStreamer()); + TStreamer.emitMachine(CPUDirectives[Directive]); // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -877,14 +868,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getRelocationModel() == Reloc::PIC_) { OutStreamer.SwitchSection( OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText())); } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { OutStreamer.SwitchSection( OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText())); } OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); @@ -916,8 +907,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { if (TM.getRelocationModel() == Reloc::PIC_) { const MCSection *StubSection = OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.SwitchSection(StubSection); @@ -937,32 +928,32 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext); // mflr r0 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); // bcl 20, 31, AnonSymbol - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon)); OutStreamer.EmitLabel(AnonSymbol); // mflr r11 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) .addExpr(SubHa16)); // mtlr r0 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) .addReg(PPC::R11)); // mtctr r12 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); // bctr - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR)); OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); @@ -984,8 +975,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { const MCSection *StubSection = OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { MCSymbol *Stub = Stubs[i].first; @@ -1001,7 +992,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lis r11, ha16(LazyPtr) const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); @@ -1009,15 +1000,15 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lwzu r12, lo16(LazyPtr)(r11) const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) .addReg(PPC::R11)); // mtctr r12 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); // bctr - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR)); OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); @@ -1058,7 +1049,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { for (std::vector::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { - MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMIMacho.getGVStubEntry(NLPSym); StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true); @@ -1147,4 +1138,5 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, extern "C" void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 3e608ca..9276211 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -115,6 +115,9 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock *Dest = 0; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); + else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && + !I->getOperand(1).isImm()) + Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) @@ -166,6 +169,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + } else if (I->getOpcode() == PPC::BC) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); + } else if (I->getOpcode() == PPC::BCn) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4224ae2..9c5db50 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -26,28 +26,28 @@ #define DEBUG_TYPE "ctrloops" #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/Statistic.h" +#include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "PPCTargetMachine.h" -#include "PPC.h" #ifndef NDEBUG #include "llvm/CodeGen/MachineDominators.h" @@ -96,8 +96,8 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); } @@ -109,7 +109,7 @@ namespace { PPCTargetMachine *TM; LoopInfo *LI; ScalarEvolution *SE; - DataLayout *TD; + const DataLayout *DL; DominatorTree *DT; const TargetLibraryInfo *LibInfo; }; @@ -145,7 +145,7 @@ namespace { INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", @@ -170,8 +170,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis(); SE = &getAnalysis(); - DT = &getAnalysis(); - TD = getAnalysisIfAvailable(); + DT = &getAnalysis().getDomTree(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : 0; LibInfo = getAnalysisIfAvailable(); bool MadeChange = false; @@ -186,6 +187,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) { return MadeChange; } +static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { + if (IntegerType *ITy = dyn_cast(Ty)) + return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); + + return false; +} + bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { @@ -352,13 +360,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { CastInst *CI = cast(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - (TT.isArch32Bit() && - (CI->getSrcTy()->getScalarType()->isIntegerTy(64) || - CI->getDestTy()->getScalarType()->isIntegerTy(64)) - )) + isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) return true; - } else if (TT.isArch32Bit() && - J->getType()->getScalarType()->isIntegerTy(64) && + } else if (isLargeIntegerTy(TT.isArch32Bit(), + J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index e8e7f4c..d48164d 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -15,6 +15,8 @@ /// CCIfSubtarget - Match if the current subtarget has a feature F. class CCIfSubtarget : CCIf().", F), A>; +class CCIfNotSubtarget + : CCIf().", F), A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention @@ -23,7 +25,8 @@ class CCIfSubtarget // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ // On PPC64, integer return values are always promoted to i64 - CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType>>, + CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType>>, + CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType>>, CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, @@ -33,7 +36,8 @@ def RetCC_PPC : CallingConv<[ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, // Vector types are always returned in V2. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>, + CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>> ]>; @@ -46,6 +50,7 @@ def RetCC_PPC : CallingConv<[ // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType>, CCIfType<[i8], CCPromoteToType>, CCIfType<[i16], CCPromoteToType>, CCIfType<[i32], CCPromoteToType>, @@ -58,6 +63,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType>, CCIfType<[i8], CCPromoteToType>, CCIfType<[i16], CCPromoteToType>, CCIfType<[i32], CCPromoteToType>, @@ -65,7 +71,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[f32], CCAssignToReg<[F1, F2]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>, + CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>> ]>; //===----------------------------------------------------------------------===// @@ -73,6 +80,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_PPC32_SVR4_Common : CallingConv<[ + CCIfType<[i1], CCPromoteToType>, + // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit>>, @@ -97,7 +106,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ CCIfType<[f32,f64], CCAssignToStack<8, 8>>, // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>> + CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>> ]>; // This calling convention puts vector arguments always on the stack. It is used @@ -113,6 +122,9 @@ def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, + VSH10, VSH11, VSH12, VSH13]>>, CCDelegateTo ]>; diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 418736e..84fc888 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -121,7 +121,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); break; - case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::CFI_INSTRUCTION: + break; case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 09117e7..dd45683 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -15,10 +15,10 @@ #define DEBUG_TYPE "ppcfastisel" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCISelLowering.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" @@ -28,12 +28,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -80,7 +80,7 @@ typedef struct Address { } } Address; -class PPCFastISel : public FastISel { +class PPCFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; @@ -127,7 +127,6 @@ class PPCFastISel : public FastISel { bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); - bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); bool SelectIToFP(const Instruction *I, bool IsSigned); @@ -325,11 +324,11 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { II != IE; ++II, ++GTI) { const Value *Op = *II; if (StructType *STy = dyn_cast(*GTI)) { - const StructLayout *SL = TD.getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. @@ -407,7 +406,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, // register and continue. This should almost never happen. if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); Addr.Base.Reg = ResultReg; Addr.BaseType = Address::RegBase; @@ -499,13 +498,13 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. @@ -529,7 +528,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LFS: Opc = PPC::LFSX; break; case PPC::LFD: Opc = PPC::LFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -615,12 +614,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg) - .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg) + .addImm(Addr.Offset) + .addFrameIndex(Addr.Base.FI) + .addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. @@ -640,7 +642,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STFS: Opc = PPC::STFSX; break; case PPC::STFD: Opc = PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -704,9 +706,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { CondReg)) return false; - BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -714,7 +716,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DL); + FastEmitBranch(Target, DbgLoc); return true; } @@ -737,6 +739,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); + if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits()) + return false; + // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are @@ -811,10 +816,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, } if (!UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addReg(SrcReg2); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addImm(Imm); return true; @@ -853,7 +858,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { // Round the result to single precision. unsigned DestReg = createResultReg(&PPC::F4RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) .addReg(SrcReg); UpdateValueMap(I, DestReg); @@ -892,11 +897,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, unsigned LoadOpc = PPC::LFD; if (SrcVT == MVT::i32) { - Addr.Offset = 4; - if (!IsSigned) + if (!IsSigned) { LoadOpc = PPC::LFIWZX; - else if (PPCSubTarget.hasLFIWAX()) + Addr.Offset = 4; + } else if (PPCSubTarget.hasLFIWAX()) { LoadOpc = PPC::LFIWAX; + Addr.Offset = 4; + } } const TargetRegisterClass *RC = &PPC::F8RCRegClass; @@ -970,7 +977,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; // Generate the convert. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(FPReg); UpdateValueMap(I, DestReg); @@ -1042,7 +1049,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); if (InRC == &PPC::F4RCRegClass) { unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) .addReg(SrcReg).addImm(PPC::F8RCRegClassID); SrcReg = TmpReg; @@ -1062,7 +1069,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; // Generate the convert. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Now move the integer value from a float register to an integer register. @@ -1155,8 +1162,10 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } if (UseImm) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg1).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg) + .addReg(SrcReg1) + .addImm(Imm); UpdateValueMap(I, ResultReg); return true; } @@ -1171,7 +1180,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { if (ISDOpcode == ISD::SUB) std::swap(SrcReg1, SrcReg2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); UpdateValueMap(I, ResultReg); return true; @@ -1198,7 +1207,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Skip vector arguments for now, as well as long double and // uint128_t, and anything that isn't passed in a register. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || !VA.isRegLoc() || VA.needsCustom()) return false; @@ -1211,7 +1220,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) .addImm(NumBytes); @@ -1270,9 +1279,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, ++NextGPR; } else ArgReg = NextGPR++; - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ArgReg).addReg(Arg); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); RegArgs.push_back(ArgReg); } @@ -1285,7 +1294,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg) { // Issue CallSEQ_END. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) .addImm(NumBytes).addImm(0); @@ -1315,14 +1324,14 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); ResultReg = createResultReg(CpyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(SourcePhysReg); // If necessary, round the floating result to single precision. } else if (CopyVT == MVT::f64) { ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), ResultReg).addReg(SourcePhysReg); // If only the low half of a general register is needed, generate @@ -1333,7 +1342,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl &UsedRegs, ResultReg = createResultReg(&PPC::GPRCRegClass); // Convert physical register from G8RC to GPRC. SourcePhysReg -= PPC::X0 - PPC::R0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(SourcePhysReg); } @@ -1440,7 +1449,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (Arg == 0) return false; - unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); Args.push_back(*II); @@ -1465,7 +1474,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Build direct call with NOP for TOC restore. // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BL8_NOP)); // Add callee. MIB.addGlobalAddress(GV); @@ -1522,8 +1531,8 @@ bool PPCFastISel::SelectRet(const Instruction *I) { const Constant *C = cast(RV); unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); unsigned RetReg = ValLocs[0].getLocReg(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - RetReg).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); RetRegs.push_back(RetReg); } else { @@ -1578,14 +1587,14 @@ bool PPCFastISel::SelectRet(const Instruction *I) { } } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetRegs[i]) .addReg(SrcReg); } } } - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BLR)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) @@ -1615,7 +1624,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); Opc = PPC::EXTSW_32_64; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Unsigned 32-bit extensions use RLWINM. @@ -1627,7 +1636,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); MB = 16; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); @@ -1640,7 +1649,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, MB = 48; else MB = 32; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICL_32_64), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); } @@ -1654,9 +1663,9 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { if (AddrReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) .addReg(AddrReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast(I); for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) @@ -1684,7 +1693,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) { // The only interesting case is when we need to switch register classes. if (SrcVT == MVT::i64) { unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg, 0, PPC::sub_32); SrcReg = ResultReg; } @@ -1791,7 +1801,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { return 0; // All FP constants are loaded from the constant pool. - unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); assert(Align > 0 && "Unexpectedly missing alignment information!"); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -1807,25 +1817,25 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), TmpReg) .addConstantPoolIndex(Idx).addReg(PPC::X2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. if (CModel == CodeModel::Large) { unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg2); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) .addReg(TmpReg) .addMemOperand(MMO); @@ -1853,7 +1863,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { if (!GVar) { // If GV is an alias, use the aliasee for determining thread-locality. if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal(false)); + GVar = dyn_cast_or_null(GA->getAliasedGlobal()); } // FIXME: We don't yet handle the complexity of TLS. @@ -1863,8 +1873,10 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg) - .addGlobalAddress(GV).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), + DestReg) + .addGlobalAddress(GV) + .addReg(PPC::X2); else { // If the address is an externally defined symbol, a symbol with // common or externally available linkage, a function address, or a @@ -1875,7 +1887,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // ADDItocL(ADDIStocHA(%X2, GV), GV) // Either way, start with the ADDIStocHA: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); // !GVar implies a function address. An external variable is one @@ -1884,11 +1896,11 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // on the "if" path here. if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); else // Otherwise generate the ADDItocL. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); } @@ -1906,21 +1918,21 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); if (isInt<16>(Imm)) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) .addImm(Imm); else if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) .addImm(Hi); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) .addReg(TmpReg).addImm(Lo); } else // Just Hi bits. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) .addImm(Hi); @@ -1960,7 +1972,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg2; if (Imm) { TmpReg2 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); } else TmpReg2 = TmpReg1; @@ -1968,14 +1980,14 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg3, Hi, Lo; if ((Hi = (Remainder >> 16) & 0xFFFF)) { TmpReg3 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), TmpReg3).addReg(TmpReg2).addImm(Hi); } else TmpReg3 = TmpReg2; if ((Lo = Remainder & 0xFFFF)) { unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), ResultReg).addReg(TmpReg3).addImm(Lo); return ResultReg; } @@ -1987,6 +1999,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + const ConstantInt *CI = cast(C); + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) @@ -2000,7 +2021,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { if (isInt<16>(CI->getSExtValue())) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(CI->getSExtValue()); return ImmReg; } @@ -2049,7 +2070,7 @@ unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { if (SI != FuncInfo.StaticAllocaMap.end()) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(SI->second).addImm(0); return ResultReg; } @@ -2152,6 +2173,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0ac2ced..d8f491f 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -353,9 +353,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { assert((isDarwinABI || isSVR4ABI) && "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); - // Prepare for frame info. - MCSymbol *FrameLabel = 0; - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) @@ -561,36 +558,37 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add the "machine moves" for the instructions we generated above, but in // reverse order. if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); - // Show update of SP. assert(NegFrameSize); - MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); if (HasFP) { unsigned Reg = MRI->getDwarfRegNum(FPReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } if (HasBP) { unsigned Reg = MRI->getDwarfRegNum(BPReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } if (MustSaveLR) { unsigned Reg = MRI->getDwarfRegNum(LRReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } - MCSymbol *ReadyLabel = 0; - // If there is a frame pointer, copy R1 into R31 if (HasFP) { BuildMI(MBB, MBBI, dl, OrInst, FPReg) @@ -598,19 +596,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(SPReg); if (needsFrameMoves) { - ReadyLabel = MMI.getContext().CreateTempSymbol(); - // Mark effective beginning of when frame pointer is ready. - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - unsigned Reg = MRI->getDwarfRegNum(FPReg, true); - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } if (needsFrameMoves) { - MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel; - // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { @@ -631,14 +627,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { - MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI->getDwarfRegNum(PPC::CR2, true), 8)); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI->getDwarfRegNum(Reg, true), Offset)); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 0df50e1..37c85b3 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -15,34 +15,220 @@ #include "PPCHazardRecognizers.h" #include "PPC.h" #include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// PowerPC Scoreboard Hazard Recognizer -void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { +bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { + // FIXME: Move this. + if (isBCTRAfterSet(SU)) + return true; + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); if (!MCID) - // This is a PPC pseudo-instruction. - return; + return false; + + if (!MCID->mayLoad()) + return false; + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || !PredMCID->mayStore()) + continue; + + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + return false; + + if (!MCID->isBranch()) + return false; + + // SU is a branch; for any predecessors in this dispatch group, with which we + // have a data dependence and set the counter register, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) + continue; + + if (SU->Preds[i].isCtrl()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +// FIXME: Remove this when we don't need this: +namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } + +// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. + +bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, + unsigned &NSlots) { + // FIXME: Indirectly, this information is contained in the itinerary, and + // we should derive it from there instead of separately specifying it + // here. + unsigned IIC = MCID->getSchedClass(); + switch (IIC) { + default: + NSlots = 1; + break; + case PPC::Sched::IIC_IntDivW: + case PPC::Sched::IIC_IntDivD: + case PPC::Sched::IIC_LdStLoadUpd: + case PPC::Sched::IIC_LdStLDU: + case PPC::Sched::IIC_LdStLFDU: + case PPC::Sched::IIC_LdStLFDUX: + case PPC::Sched::IIC_LdStLHA: + case PPC::Sched::IIC_LdStLHAU: + case PPC::Sched::IIC_LdStLWA: + case PPC::Sched::IIC_LdStSTDU: + case PPC::Sched::IIC_LdStSTFDU: + NSlots = 2; + break; + case PPC::Sched::IIC_LdStLoadUpdX: + case PPC::Sched::IIC_LdStLDUX: + case PPC::Sched::IIC_LdStLHAUX: + case PPC::Sched::IIC_LdStLWARX: + case PPC::Sched::IIC_LdStLDARX: + case PPC::Sched::IIC_LdStSTDUX: + case PPC::Sched::IIC_LdStSTDCX: + case PPC::Sched::IIC_LdStSTWCX: + case PPC::Sched::IIC_BrMCRX: // mtcr + // FIXME: Add sync/isync (here and in the itinerary). + NSlots = 4; + break; + } - ScoreboardHazardRecognizer::EmitInstruction(SU); + // FIXME: record-form instructions need a different itinerary class. + if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) + NSlots = 2; + + switch (IIC) { + default: + // All multi-slot instructions must come first. + return NSlots > 1; + case PPC::Sched::IIC_BrCR: // cr logicals + case PPC::Sched::IIC_SprMFCR: + case PPC::Sched::IIC_SprMFCRF: + case PPC::Sched::IIC_SprMTSPR: + return true; + } } ScheduleHazardRecognizer::HazardType -PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { +PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (Stalls == 0 && isLoadAfterStore(SU)) + return NoopHazard; + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); } -void PPCScoreboardHazardRecognizer::AdvanceCycle() { - ScoreboardHazardRecognizer::AdvanceCycle(); +bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + unsigned NSlots; + if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) + return true; + + return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); +} + +unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + // We only need to fill out a maximum of 5 slots here: The 6th slot could + // only be a second branch, and otherwise the next instruction will start a + // new group. + if (isLoadAfterStore(SU) && CurSlots < 6) { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If we're using a special group-terminating nop, then we need only one. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7) + return 1; + + return 5 - CurSlots; + } + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID) { + if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << + SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + + unsigned NSlots; + bool MustBeFirst = mustComeFirst(MCID, NSlots); + + // If this instruction must come first, but does not, then it starts a + // new group. + if (MustBeFirst && CurSlots) { + CurSlots = CurBranches = 0; + CurGroup.clear(); + } + + CurSlots += NSlots; + CurGroup.push_back(SU); + + if (MCID->isBranch()) + ++CurBranches; + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { + return ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { + llvm_unreachable("Bottom-up scheduling not supported"); } -void PPCScoreboardHazardRecognizer::Reset() { - ScoreboardHazardRecognizer::Reset(); +void PPCDispatchGroupSBHazardRecognizer::Reset() { + CurGroup.clear(); + CurSlots = CurBranches = 0; + return ScoreboardHazardRecognizer::Reset(); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If the group has now filled all of its slots, or if we're using a special + // group-terminating nop, the group is complete. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + CurSlots == 6) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + CurGroup.push_back(0); + ++CurSlots; + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 84b8e6d..6b7fe41 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,19 +21,30 @@ namespace llvm { -/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based -/// hazard recognizer for generic PPC processors. -class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { +/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for PPC ooo processors with dispatch-group hazards. +class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); public: - PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, + PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : - ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_), + CurSlots(0), CurBranches(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual bool ShouldPreferAnother(SUnit* SU); + virtual unsigned PreEmitNoops(SUnit *SU); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); + virtual void RecedeCycle(); virtual void Reset(); + virtual void EmitNoop(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6ba6af6..3bbc839 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -34,6 +35,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +// FIXME: Remove this once the bug has been fixed! +cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", +cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -181,6 +186,12 @@ namespace { private: SDNode *SelectSETCC(SDNode *N); + + void PeepholePPC64(); + void PeepholdCROps(); + + bool AllUsersSelectZero(SDNode *N); + void SwapAllSelectUsers(SDNode *N); }; } @@ -261,11 +272,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { DebugLoc dl; if (PPCLowering.getPointerTy() == MVT::i32) { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } else { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } @@ -561,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, Opc = PPC::FCMPUS; } else { assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); - Opc = PPC::FCMPUD; + Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -629,7 +640,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, // only support the altivec types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC, + bool HasVSX) { switch (CC) { case ISD::SETEQ: case ISD::SETUEQ: @@ -643,7 +655,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { return PPC::VCMPEQUW; // v4f32 != v4f32 could be translate to unordered not equal else if (VecVT == MVT::v4f32) - return PPC::VCMPEQFP; + return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPEQDP; break; case ISD::SETLT: case ISD::SETGT: @@ -656,7 +670,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; else if (VecVT == MVT::v4f32) - return PPC::VCMPGTFP; + return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGTDP; break; case ISD::SETULT: case ISD::SETUGT: @@ -671,17 +687,23 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { break; case ISD::SETOEQ: if (VecVT == MVT::v4f32) - return PPC::VCMPEQFP; + return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPEQDP; break; case ISD::SETOLT: case ISD::SETOGT: case ISD::SETOLE: if (VecVT == MVT::v4f32) - return PPC::VCMPGTFP; + return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGTDP; break; case ISD::SETOGE: if (VecVT == MVT::v4f32) - return PPC::VCMPGEFP; + return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGEDP; break; default: break; @@ -692,7 +714,7 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { // getVCmpEQInst: return the equal compare instruction for the specified vector // type. Since this is for altivec specific code, only support the altivec // types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) { switch (VecVT) { case MVT::v16i8: return PPC::VCMPEQUB; @@ -701,13 +723,14 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { case MVT::v4i32: return PPC::VCMPEQUW; case MVT::v4f32: - return PPC::VCMPEQFP; + return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; + case MVT::v2f64: + return PPC::XVCMPEQDP; default: llvm_unreachable("Invalid integer vector compare condition"); } } - SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; @@ -715,7 +738,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - if (isInt32Immediate(N->getOperand(1), Imm)) { + if (!PPCSubTarget.useCRBits() && + isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 @@ -796,7 +820,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (LHS.getValueType().isVector()) { EVT VecVT = LHS.getValueType(); MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; - unsigned int VCmpInst = getVCmpInst(VT, CC); + unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX()); switch (CC) { case ISD::SETEQ: @@ -807,7 +831,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETONE: case ISD::SETUNE: { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLNOR : + PPC::VNOR, + VecVT, VCmp, VCmp); } case ISD::SETLT: case ISD::SETOLT: @@ -827,24 +853,31 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); } else { SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + PPC::VOR, + VecVT, VCmpGT, VCmpEQ); } } case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: { SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + PPC::VOR, + VecVT, VCmpLE, VCmpEQ); } default: llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); } } + if (PPCSubTarget.useCRBits()) + return 0; + bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); @@ -959,8 +992,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::SETCC: - return SelectSETCC(N); + case ISD::SETCC: { + SDNode *SN = SelectSETCC(N); + if (SN) + return SN; + break; + } case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); @@ -1122,7 +1159,21 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isMask_64(Imm64)) { SDValue Val = N->getOperand(0); MB = 64 - CountTrailingOnes_64(Imm64); - SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + SH = 0; + + // If the operand is a logical right shift, we can fold it into this + // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) + // for n <= mb. The right shift is really a left rotate followed by a + // mask, and this mask is a more-restrictive sub-mask of the mask implied + // by the shift. + if (Val.getOpcode() == ISD::SRL && + isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { + assert(Imm < 64 && "Illegal shift amount"); + Val = Val.getOperand(0); + SH = 64 - Imm; + } + + SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) }; return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); } // AND X, 0 -> 0, not "rlwinm 32". @@ -1179,11 +1230,39 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + // FIXME: Remove this once the ANDI glue bug is fixed: + case PPCISD::ANDIo_1_EQ_BIT: + case PPCISD::ANDIo_1_GT_BIT: { + if (!ANDIGlueBug) + break; + + EVT InVT = N->getOperand(0).getValueType(); + assert((InVT == MVT::i64 || InVT == MVT::i32) && + "Invalid input type for ANDIo_1_EQ_BIT"); + + unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; + SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, + N->getOperand(0), + CurDAG->getTargetConstant(1, InVT)), 0); + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue SRIdxVal = + CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? + PPC::sub_eq : PPC::sub_gt, MVT::i32); + + return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, + CR0Reg, SRIdxVal, + SDValue(AndI.getNode(), 1) /* glue */); + } case ISD::SELECT_CC: { ISD::CondCode CC = cast(N->getOperand(4))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); + // If this is a select of i1 operands, we'll pattern match it. + if (PPCSubTarget.useCRBits() && + N->getOperand(0).getValueType() == MVT::i1) + break; + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast(N->getOperand(1))) @@ -1202,6 +1281,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + + if (N->getValueType(0) == MVT::i1) { + // An i1 select is: (c & t) | (!c & f). + bool Inv; + unsigned Idx = getCRIdxForSetCC(CC, Inv); + + unsigned SRI; + switch (Idx) { + default: llvm_unreachable("Invalid CC index"); + case 0: SRI = PPC::sub_lt; break; + case 1: SRI = PPC::sub_gt; break; + case 2: SRI = PPC::sub_eq; break; + case 3: SRI = PPC::sub_un; break; + } + + SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); + + SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, + CCBit, CCBit), 0); + SDValue C = Inv ? NotCCBit : CCBit, + NotC = Inv ? CCBit : NotCCBit; + + SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + C, N->getOperand(2)), 0); + SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + NotC, N->getOperand(3)), 0); + + return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); + } + unsigned BROpc = getPredicateForSetCC(CC); unsigned SelectCCOp; @@ -1220,6 +1329,50 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(BROpc) }; return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); } + case ISD::VSELECT: + if (PPCSubTarget.hasVSX()) { + SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops, 3); + } + + break; + case ISD::VECTOR_SHUFFLE: + if (PPCSubTarget.hasVSX() && (N->getValueType(0) == MVT::v2f64 || + N->getValueType(0) == MVT::v2i64)) { + ShuffleVectorSDNode *SVN = cast(N); + + SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), + Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); + unsigned DM[2]; + + for (int i = 0; i < 2; ++i) + if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) + DM[i] = 0; + else + DM[i] = 1; + + SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32); + + if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && + Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && + isa(Op1.getOperand(0))) { + LoadSDNode *LD = cast(Op1.getOperand(0)); + SDValue Base, Offset; + + if (LD->isUnindexed() && + SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { + SDValue Chain = LD->getChain(); + SDValue Ops[] = { Base, Offset, Chain }; + return CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops, 3); + } + } + + SDValue Ops[] = { Op1, Op2, DMV }; + return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops, 3); + } + + break; case PPCISD::BDNZ: case PPCISD::BDZ: { bool IsPPC64 = PPCSubTarget.isPPC64(); @@ -1244,8 +1397,30 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::BR_CC: { ISD::CondCode CC = cast(N->getOperand(1))->get(); + unsigned PCC = getPredicateForSetCC(CC); + + if (N->getOperand(2).getValueType() == MVT::i1) { + unsigned Opc; + bool Swap; + switch (PCC) { + default: llvm_unreachable("Unexpected Boolean-operand predicate"); + case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; + case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; + case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; + case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; + case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; + case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; + } + + SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, + N->getOperand(Swap ? 3 : 2), + N->getOperand(Swap ? 2 : 3)), 0); + return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, + BitComp, N->getOperand(4), N->getOperand(0)); + } + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); - SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + SDValue Ops[] = { getI32Imm(PCC), CondCode, N->getOperand(4), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); } @@ -1288,8 +1463,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalValue *RealGValue = + GAlias ? GAlias->getAliasedGlobal() : GValue; const GlobalVariable *GVar = dyn_cast(RealGValue); assert((GVar || isa(RealGValue)) && "Unexpected global value subclass!"); @@ -1382,7 +1557,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } -/// PostProcessISelDAG - Perform some late peephole optimizations +/// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { @@ -1390,6 +1565,478 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { if (TM.getOptLevel() == CodeGenOpt::None) return; + PeepholePPC64(); + PeepholdCROps(); +} + +// Check if all users of this node will become isel where the second operand +// is the constant zero. If this is so, and if we can negate the condition, +// then we can flip the true and false operands. This will allow the zero to +// be folded with the isel so that we don't need to materialize a register +// containing zero. +bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { + // If we're not using isel, then this does not matter. + if (!PPCSubTarget.hasISEL()) + return false; + + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (!User->isMachineOpcode()) + return false; + if (User->getMachineOpcode() != PPC::SELECT_I4 && + User->getMachineOpcode() != PPC::SELECT_I8) + return false; + + SDNode *Op2 = User->getOperand(2).getNode(); + if (!Op2->isMachineOpcode()) + return false; + + if (Op2->getMachineOpcode() != PPC::LI && + Op2->getMachineOpcode() != PPC::LI8) + return false; + + ConstantSDNode *C = dyn_cast(Op2->getOperand(0)); + if (!C) + return false; + + if (!C->isNullValue()) + return false; + } + + return true; +} + +void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { + SmallVector ToReplace; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + assert((User->getMachineOpcode() == PPC::SELECT_I4 || + User->getMachineOpcode() == PPC::SELECT_I8) && + "Must have all select users"); + ToReplace.push_back(User); + } + + for (SmallVector::iterator UI = ToReplace.begin(), + UE = ToReplace.end(); UI != UE; ++UI) { + SDNode *User = *UI; + SDNode *ResNode = + CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), + User->getValueType(0), User->getOperand(0), + User->getOperand(2), + User->getOperand(1)); + + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(User->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + ReplaceUses(User, ResNode); + } +} + +void PPCDAGToDAGISel::PeepholdCROps() { + bool IsModified; + do { + IsModified = false; + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + MachineSDNode *MachineNode = dyn_cast(I); + if (!MachineNode || MachineNode->use_empty()) + continue; + SDNode *ResNode = MachineNode; + + bool Op1Set = false, Op1Unset = false, + Op1Not = false, + Op2Set = false, Op2Unset = false, + Op2Not = false; + + unsigned Opcode = MachineNode->getMachineOpcode(); + switch (Opcode) { + default: break; + case PPC::CRAND: + case PPC::CRNAND: + case PPC::CROR: + case PPC::CRXOR: + case PPC::CRNOR: + case PPC::CREQV: + case PPC::CRANDC: + case PPC::CRORC: { + SDValue Op = MachineNode->getOperand(1); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op2Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op2Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op2Not = true; + } + } // fallthrough + case PPC::BC: + case PPC::BCn: + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: { + SDValue Op = MachineNode->getOperand(0); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op1Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op1Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op1Not = true; + } + } + break; + } + + bool SelectSwap = false; + switch (Opcode) { + default: break; + case PPC::CRAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x & x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set) + // 1 & y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // x & 1 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset || Op2Unset) + // x & 0 = 0 & y = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // ~x & y = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x & ~y = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRNAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // nand(x, x) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Set) + // nand(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // nand(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset || Op2Unset) + // nand(x, 0) = nand(0, y) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nand(x, ~y) = ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CROR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x | x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set || Op2Set) + // x | 1 = 1 | y = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // 0 | y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // x | 0 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRXOR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // xor(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // xor(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // xor(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset) + // xor(0, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // xor(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // xor(~x, y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // xor(x, ~y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRNOR: + if (Op1Set || Op2Set) + // nor(1, y) -> 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // nor(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // nor(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // nor(~x, y) = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nor(x, ~y) = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CREQV: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // eqv(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // eqv(1, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // eqv(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // eqv(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // eqv(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // eqv(~x, y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // eqv(x, ~y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRANDC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // andc(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // andc(1, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Unset || Op2Set) + // andc(0, y) = andc(x, 1) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Unset) + // andc(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // andc(~x, y) = ~(x | y) = nor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // andc(x, ~y) = x & y + ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0)), + SelectSwap = true; + break; + case PPC::CRORC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // orc(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set || Op2Unset) + // orc(1, y) = orc(x, 0) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Set) + // orc(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // orc(0, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Not) + // orc(~x, y) = ~(x & y) = nand(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // orc(x, ~y) = x | y + ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0)), + SelectSwap = true; + break; + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: + if (Op1Set) + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op1Unset) + ResNode = MachineNode->getOperand(2).getNode(); + else if (Op1Not) + ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), + SDLoc(MachineNode), + MachineNode->getValueType(0), + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(2), + MachineNode->getOperand(1)); + break; + case PPC::BC: + case PPC::BCn: + if (Op1Not) + ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : + PPC::BC, + SDLoc(MachineNode), + MVT::Other, + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1), + MachineNode->getOperand(2)); + // FIXME: Handle Op1Set, Op1Unset here too. + break; + } + + // If we're inverting this node because it is used only by selects that + // we'd like to swap, then swap the selects before the node replacement. + if (SelectSwap) + SwapAllSelectUsers(MachineNode); + + if (ResNode != MachineNode) { + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(MachineNode->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + ReplaceUses(MachineNode, ResNode); + IsModified = true; + } + } + if (IsModified) + CurDAG->RemoveDeadNodes(); + } while (IsModified); +} + +void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) return; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8da5f05..32ac1dc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -46,6 +46,9 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); +// FIXME: Remove this once the bug has been fixed! +extern cl::opt ANDIGlueBug; + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -94,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + if (Subtarget->useCRBits()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (isPPC64 || Subtarget->hasFPCVT()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + } else { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); + } + + // PowerPC does not support direct load / store of condition registers + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + + // FIXME: Remove this once the ANDI glue bug is fixed: + if (ANDIGlueBug) + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); + } + // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); @@ -191,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - // PowerPC does not have Select - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + if (!Subtarget->useCRBits()) { + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit - setOperationAction(ISD::SETCC, MVT::i32, Custom); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + if (!Subtarget->useCRBits()) + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -445,7 +485,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, + Subtarget->useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); @@ -464,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath) { + if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -491,6 +532,83 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); + + if (Subtarget->hasVSX()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::MUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + + // Share the Altivec comparison restrictions. + setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand); + + setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); + + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::STORE, MVT::v2f64, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); + + addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); + addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); + + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + setOperationAction(ISD::LOAD, MVT::v2i64, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); + setOperationAction(ISD::STORE, MVT::v2i64, Promote); + AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); + + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + + // Vector operation legalization checks the result type of + // SIGN_EXTEND_INREG, overall legalization checks the inner type. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); + } } if (Subtarget->has64BitSupport()) { @@ -522,9 +640,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); + if (Subtarget->useCRBits()) + setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + + if (Subtarget->useCRBits()) { + setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT_CC); + } + // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); @@ -545,6 +675,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } + // With 32 condition bits, we don't need to sink (and duplicate) compares + // aggressively in CodeGenPrep. + if (Subtarget->useCRBits()) + setHasMultipleConditionRegisters(); + setMinFunctionAlignment(2); if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); @@ -670,6 +805,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -688,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return MVT::i32; + return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } @@ -754,8 +890,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + if (N->getValueType(0) != MVT::v16i8) + return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); @@ -792,8 +928,8 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + if (N->getValueType(0) != MVT::v16i8) + return false; ShuffleVectorSDNode *SVOp = cast(N); @@ -1431,18 +1567,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } - if (!is64bit) - llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, - PtrVT, GOTReg, TGA); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + } else + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, - PtrVT, TGA, TPOffsetHi); + PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } @@ -1534,6 +1671,27 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); + if (Op.getValueType() == MVT::v2i64) { + // When the operands themselves are v2i64 values, we need to do something + // special because VSX has no underlying comparison operations for these. + if (Op.getOperand(0).getValueType() == MVT::v2i64) { + // Equality can be handled by casting to the legal type for Altivec + // comparisons, everything else needs to be expanded. + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, + DAG.getSetCC(dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), + CC)); + } + + return SDValue(); + } + + // We handle most of these in the usual way. + return Op; + } + // If we're comparing for equality to zero, expose the fact that this is // implented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. @@ -1922,7 +2080,7 @@ static const uint16_t *GetFPR() { /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { - unsigned ArgSize = ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -2020,6 +2178,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); + case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; @@ -2027,7 +2186,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = &PPC::F8RCRegClass; + if (PPCSubTarget.hasVSX()) + RC = &PPC::VSFRCRegClass; + else + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: @@ -2035,18 +2197,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v2f64: + case MVT::v2i64: + RC = &PPC::VSHRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + ValVT == MVT::i1 ? MVT::i32 : ValVT); + + if (ValVT == MVT::i1) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); @@ -2182,7 +2352,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } // Set the size that is at least reserved in caller of this function. Tail @@ -2246,6 +2416,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const uint16_t VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; @@ -2265,7 +2439,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); @@ -2275,7 +2449,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || - ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 || + ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) { if (isVarArg) { MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(ObjectVT, @@ -2333,7 +2508,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), @@ -2345,7 +2520,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg), false, false, 0); } @@ -2369,7 +2544,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2384,13 +2559,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2416,7 +2592,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ? + &PPC::VSFRCRegClass : + &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -2431,10 +2609,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? + MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : + MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2581,6 +2763,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; @@ -2665,8 +2848,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, - CurArgOffset), + MachinePointerInfo(FuncArg), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2690,7 +2872,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, ArgOffset), + MachinePointerInfo(FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2705,11 +2887,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + + if (ObjectVT == MVT::i1) + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); + ++GPR_idx; } else { needsLoad = true; @@ -2725,7 +2912,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2888,7 +3075,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || - ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { + ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 || + ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) { if (!isVarArg && !isPPC64) { // Non-varargs Altivec parameters go after all the non-Altivec // parameters; handle those later so we know how much padding we need. @@ -2968,7 +3156,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Flags.isByVal()) return false; } - // Non PIC/GOT tail calls are supported. + // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; @@ -3706,6 +3894,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); + seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -3863,6 +4054,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const uint16_t VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; + const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -3884,7 +4080,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // Promote integers to 64-bit values. - if (Arg.getValueType() == MVT::i32) { + if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); @@ -4008,6 +4204,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { @@ -4068,6 +4265,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: if (isVarArg) { // These go aligned on the stack, or in the corresponding R registers // when within range. The Darwin PPC ABI doc claims they also go in @@ -4091,7 +4290,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { @@ -4111,7 +4316,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // stack space allocated at the end. if (VR_idx != NumVRs) { // Doesn't have GPR space allocated. - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, @@ -4339,9 +4549,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, @@ -4693,6 +4907,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, Op.getOperand(0), Op.getOperand(1)); } +SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 loads"); + + // First, load 8 bits into 32 bits, then truncate to 1 bit. + + SDLoc dl(Op); + LoadSDNode *LD = cast(Op); + + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand *MMO = LD->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, + BasePtr, MVT::i8, MMO); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + + SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(1).getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + // First, zero extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl(Op); + StoreSDNode *ST = cast(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +// FIXME: Remove this once the ANDI glue bug is fixed: +SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 results"); + + SDLoc DL(Op); + return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, + Op.getOperand(0)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4859,6 +5122,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + if (Op.getOperand(0).getValueType() == MVT::i1) + return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), + DAG.getConstantFP(1.0, Op.getValueType()), + DAG.getConstantFP(0.0, Op.getValueType())); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -5686,6 +5954,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int + // instructions), but for smaller types, we need to first extend up to v2i32 + // before doing going farther. + if (Op.getValueType() == MVT::v2i64) { + EVT ExtVT = cast(Op.getOperand(1))->getVT(); + if (ExtVT != MVT::v2i32) { + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, + DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), + ExtVT.getVectorElementType(), 4))); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, + DAG.getValueType(MVT::v2i32)); + } + + return Op; + } + + return SDValue(); +} + SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -5792,6 +6084,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -5810,6 +6105,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -5915,8 +6211,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -5984,8 +6279,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6137,7 +6431,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, - llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible @@ -6357,9 +6651,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8)) { + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8)) { SmallVector Cond; - Cond.push_back(MI->getOperand(4)); + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8) + Cond.push_back(MI->getOperand(4)); + else + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI->getOperand(1)); DebugLoc dl = MI->getDebugLoc(); @@ -6371,9 +6671,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { - - + MI->getOpcode() == PPC::SELECT_CC_VRRC || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -6387,23 +6690,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + if (MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { + BuildMI(BB, dl, TII->get(PPC::BC)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } else { + unsigned SelectPred = MI->getOperand(4).getImm(); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } // copy0MBB: // %FalseValue = ... @@ -6505,8 +6816,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: @@ -6576,8 +6886,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6726,6 +7035,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { + unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? + PPC::ANDIo8 : PPC::ANDIo; + bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? + &PPC::GPRCRegClass : + &PPC::G8RCRegClass); + + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) + .addReg(MI->getOperand(1).getReg()).addImm(1); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), + MI->getOperand(0).getReg()) + .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6747,7 +7077,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || (VT == MVT::f64 && PPCSubTarget.hasFRE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || + (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -6809,7 +7140,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || + (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal sqrt, we need to find the zero of the function: @@ -6980,6 +7312,536 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } +SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + assert(PPCSubTarget.useCRBits() && + "Expecting to be tracking CR bits"); + // If we're tracking CR bits, we need to be careful that we don't have: + // trunc(binary-ops(zext(x), zext(y))) + // or + // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) + // such that we're unnecessarily moving things into GPRs when it would be + // better to keep them in CR bits. + + // Note that trunc here can be an actual i1 trunc, or can be the effective + // truncation that comes from a setcc or select_cc. + if (N->getOpcode() == ISD::TRUNCATE && + N->getValueType(0) != MVT::i1) + return SDValue(); + + if (N->getOperand(0).getValueType() != MVT::i32 && + N->getOperand(0).getValueType() != MVT::i64) + return SDValue(); + + if (N->getOpcode() == ISD::SETCC || + N->getOpcode() == ISD::SELECT_CC) { + // If we're looking at a comparison, then we need to make sure that the + // high bits (all except for the first) don't matter the result. + ISD::CondCode CC = + cast(N->getOperand( + N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); + unsigned OpBits = N->getOperand(0).getValueSizeInBits(); + + if (ISD::isSignedIntSetCC(CC)) { + if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || + DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) + return SDValue(); + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (!DAG.MaskedValueIsZero(N->getOperand(0), + APInt::getHighBitsSet(OpBits, OpBits-1)) || + !DAG.MaskedValueIsZero(N->getOperand(1), + APInt::getHighBitsSet(OpBits, OpBits-1))) + return SDValue(); + } else { + // This is neither a signed nor an unsigned comparison, just make sure + // that the high bits are equal. + APInt Op1Zero, Op1One; + APInt Op2Zero, Op2One; + DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One); + DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One); + + // We don't really care about what is known about the first bit (if + // anything), so clear it in all masks prior to comparing them. + Op1Zero.clearBit(0); Op1One.clearBit(0); + Op2Zero.clearBit(0); Op2One.clearBit(0); + + if (Op1Zero != Op2Zero || Op1One != Op2One) + return SDValue(); + } + } + + // We now know that the higher-order bits are irrelevant, we just need to + // make sure that all of the intermediate operations are bit operations, and + // all inputs are extensions. + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC && + N->getOperand(0).getOpcode() != ISD::TRUNCATE && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && + N->getOperand(1).getOpcode() != ISD::AND && + N->getOperand(1).getOpcode() != ISD::OR && + N->getOperand(1).getOpcode() != ISD::XOR && + N->getOperand(1).getOpcode() != ISD::SELECT && + N->getOperand(1).getOpcode() != ISD::SELECT_CC && + N->getOperand(1).getOpcode() != ISD::TRUNCATE && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + SmallVector Inputs; + SmallVector BinOps, PromOps; + SmallPtrSet Visited; + + for (unsigned i = 0; i < 2; ++i) { + if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa(N->getOperand(i))) + Inputs.push_back(N->getOperand(i)); + else + BinOps.push_back(N->getOperand(i)); + + if (N->getOpcode() == ISD::TRUNCATE) + break; + } + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by extensions. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || + BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not an extension or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + // Replace all inputs with the extension operand. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constants may have users outside the cluster of to-be-promoted nodes, + // and so we need to replace those as we do the promotions. + if (isa(Inputs[i])) + continue; + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + } + + // Replace all operations (these are all the same, but have a different + // (i1) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. Any intermediate truncations or + // extensions disappear. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + if (PromOp.getOpcode() == ISD::TRUNCATE || + PromOp.getOpcode() == ISD::SIGN_EXTEND || + PromOp.getOpcode() == ISD::ZERO_EXTEND || + PromOp.getOpcode() == ISD::ANY_EXTEND) { + if (!isa(PromOp.getOperand(0)) && + PromOp.getOperand(0).getValueType() != MVT::i1) { + // The operand is not yet ready (see comment below). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SDValue RepValue = PromOp.getOperand(0); + if (isa(RepValue)) + RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); + + DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); + continue; + } + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != MVT::i1) || + (!isa(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != MVT::i1)) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If there are any constant inputs, make sure they're replaced now. + for (unsigned i = 0; i < 2; ++i) + if (isa(Ops[C+i])) + Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, + Ops.data(), Ops.size())); + } + + // Now we're left with the initial truncation itself. + if (N->getOpcode() == ISD::TRUNCATE) + return N->getOperand(0); + + // Otherwise, this is a comparison. The operands to be compared have just + // changed type (to i1), but everything else is the same. + return SDValue(N, 0); +} + +SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + // If we're tracking CR bits, we need to be careful that we don't have: + // zext(binary-ops(trunc(x), trunc(y))) + // or + // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) + // such that we're unnecessarily moving things into CR bits that can more + // efficiently stay in GPRs. Note that if we're not certain that the high + // bits are set as required by the final extension, we still may need to do + // some masking to get the proper behavior. + + // This same functionality is important on PPC64 when dealing with + // 32-to-64-bit extensions; these occur often when 32-bit values are used as + // the return values of functions. Because it is so similar, it is handled + // here as well. + + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return SDValue(); + + if (!((N->getOperand(0).getValueType() == MVT::i1 && + PPCSubTarget.useCRBits()) || + (N->getOperand(0).getValueType() == MVT::i32 && + PPCSubTarget.isPPC64()))) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC) + return SDValue(); + + SmallVector Inputs; + SmallVector BinOps(1, N->getOperand(0)), PromOps; + SmallPtrSet Visited; + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by truncations. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + isa(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not a truncation or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + unsigned PromBits = N->getOperand(0).getValueSizeInBits(); + bool ReallyNeedsExt = false; + if (N->getOpcode() != ISD::ANY_EXTEND) { + // If all of the inputs are not already sign/zero extended, then + // we'll still need to do that at the end. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa(Inputs[i])) + continue; + + unsigned OpBits = + Inputs[i].getOperand(0).getValueSizeInBits(); + assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); + + if ((N->getOpcode() == ISD::ZERO_EXTEND && + !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), + APInt::getHighBitsSet(OpBits, + OpBits-PromBits))) || + (N->getOpcode() == ISD::SIGN_EXTEND && + DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < + (OpBits-(PromBits-1)))) { + ReallyNeedsExt = true; + break; + } + } + } + + // Replace all inputs, either with the truncation operand, or a + // truncation or extension to the final output type. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constant inputs need to be replaced with the to-be-promoted nodes that + // use them because they might have users outside of the cluster of + // promoted nodes. + if (isa(Inputs[i])) + continue; + + SDValue InSrc = Inputs[i].getOperand(0); + if (Inputs[i].getValueType() == N->getValueType(0)) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); + else if (N->getOpcode() == ISD::SIGN_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); + } + + // Replace all operations (these are all the same, but have a different + // (promoted) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != N->getValueType(0)) || + (!isa(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If this node has constant inputs, then they'll need to be promoted here. + for (unsigned i = 0; i < 2; ++i) { + if (!isa(Ops[C+i])) + continue; + if (Ops[C+i].getValueType() == N->getValueType(0)) + continue; + + if (N->getOpcode() == ISD::SIGN_EXTEND) + Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else + Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + } + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), + Ops.data(), Ops.size())); + } + + // Now we're left with the initial extension itself. + if (!ReallyNeedsExt) + return N->getOperand(0); + + // To zero extend, just mask off everything except for the first bit (in the + // i1 case). + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), + DAG.getConstant(APInt::getLowBitsSet( + N->getValueSizeInBits(0), PromBits), + N->getValueType(0))); + + assert(N->getOpcode() == ISD::SIGN_EXTEND && + "Invalid extension type"); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + SDValue ShiftCst = + DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); + return DAG.getNode(ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), + N->getOperand(0), ShiftCst), ShiftCst); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); @@ -7006,6 +7868,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return DAGCombineExtBoolTrunc(N, DCI); + case ISD::TRUNCATE: + case ISD::SETCC: + case ISD::SELECT_CC: + return DAGCombineTruncBoolExt(N, DCI); case ISD::FDIV: { assert(TM.Options.UnsafeFPMath && "Reciprocal estimates require UnsafeFPMath"); @@ -7204,7 +8074,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // you might suspect (sizeof(vector) bytes after the last requested // load), but rather sizeof(vector) - 1 bytes after the last // requested vector. The point of this is to avoid a page fault if the - // base address happend to be aligned. This works because if the base + // base address happened to be aligned. This works because if the base // address is aligned, then adding less than a full vector length will // cause the last vector in the sequence to be (re)loaded. Otherwise, // the next vector will be fetched as you might suspect was necessary. @@ -7421,6 +8291,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::BRCOND: { + SDValue Cond = N->getOperand(1); + SDValue Target = N->getOperand(2); + + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast(Cond.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero) { + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); + assert(Cond.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, + N->getOperand(0), Target); + } + } + break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This @@ -7585,6 +8474,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { // suboptimal. return C_Memory; } + } else if (Constraint == "wc") { // individual CR bits. + return C_RegisterClass; + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf" || Constraint == "ws") { + return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } @@ -7602,7 +8496,18 @@ PPCTargetLowering::getSingleConstraintMatchWeight( if (CallOperandVal == NULL) return CW_Default; Type *type = CallOperandVal->getType(); + // Look at the constraint type. + if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) + return CW_Register; // an individual CR bit. + else if ((StringRef(constraint) == "wa" || + StringRef(constraint) == "wd" || + StringRef(constraint) == "wf") && + type->isVectorTy()) + return CW_Register; + else if (StringRef(constraint) == "ws" && type->isDoubleTy()) + return CW_Register; + switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); @@ -7658,6 +8563,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } + } else if (Constraint == "wc") { // an individual CR bit. + return std::make_pair(0U, &PPC::CRBITRCRegClass); + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf") { + return std::make_pair(0U, &PPC::VSRCRegClass); + } else if (Constraint == "ws") { + return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = @@ -7793,6 +8705,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); @@ -7881,6 +8796,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; @@ -7894,8 +8810,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.getSimpleVT().isVector()) - return false; + if (VT.getSimpleVT().isVector()) { + if (PPCSubTarget.hasVSX()) { + if (VT != MVT::v2f64 && VT != MVT::v2i64) + return false; + } else { + return false; + } + } if (VT == MVT::ppcf128) return false; @@ -7923,6 +8845,15 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +bool +PPCTargetLowering::shouldExpandBuildVectorWithShuffles( + EVT VT , unsigned DefinedValues) const { + if (VT == MVT::v2i64) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index df3af35..da6d4dc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -19,8 +19,8 @@ #include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -121,6 +121,12 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, @@ -177,6 +183,10 @@ namespace llvm { CR6SET, CR6UNSET, + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS + /// on PPC32. + PPC32_GOT, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. @@ -457,7 +467,9 @@ namespace llvm { /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + virtual bool allowsUnalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + bool *Fast = 0) const; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -465,6 +477,11 @@ namespace llvm { /// expanded to fmul + fadd. virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; + // Should we expand the build vector with shuffles? + virtual bool + shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const; + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, @@ -509,6 +526,9 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; @@ -520,6 +540,7 @@ namespace llvm { SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, @@ -625,6 +646,8 @@ namespace llvm { SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 46db4fe..b71c09e 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -19,11 +19,13 @@ def s16imm64 : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def u16imm64 : Operand { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def s17imm64 : Operand { // This operand type is used for addis/lis to allow the assembler parser @@ -32,14 +34,11 @@ def s17imm64 : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def tocentry : Operand { let MIOperandInfo = (ops i64imm:$imm); } -def PPCTLSRegOperand : AsmOperandClass { - let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; - let RenderMethod = "addTLSRegOperands"; -} def tlsreg : Operand { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; @@ -80,15 +79,22 @@ def HI48_64 : SDNodeXForm, + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, + Requires<[In64BitMode]>; + def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>, Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>, + def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>, Requires<[In64BitMode]>; } } @@ -107,9 +113,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; } } @@ -119,41 +125,58 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func), - "bl $func", BrB, []>; + "bl $func", IIC_BrB, []>; def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>; } let Uses = [RM], isCodeGenOnly = 1 in { def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins tlscall:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins abscalltarget:$func), - "bla $func\n\tnop", BrB, + "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>, - Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in { + def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + } } } } // Interpretation64Bit +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let Interpretation64Bit = 1, isAsmParserOnly = 1 in +let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in +def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", IIC_BrB, []>; + // Calls def : Pat<(PPCcall (i64 tglobaladdr:$dst)), (BL8 tglobaladdr:$dst)>; @@ -199,16 +222,16 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), - "ldarx $rD, $ptr", LdStLDARX, + "ldarx $rD, $ptr", IIC_LdStLDARX, [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), - "stdcx. $rS, $dst", LdStSTDCX, + "stdcx. $rS, $dst", IIC_LdStSTDCX, [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -225,28 +248,23 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; -let isCodeGenOnly = 1 in { - let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in -def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In64BitMode]>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; - -} } // Interpretation64Bit def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), @@ -260,23 +278,23 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in { def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -298,24 +316,24 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { let Uses = [CTR8] in { def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in { +let hasSideEffects = 1, Defs = [CTR8] in { let Pattern = [(int_ppc_mtctr i64:$rS)] in def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), - "mfspr $rT, 268", SprMFTB>, + "mfspr $rT, 268", IIC_SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; // Note that encoding mftb using mfspr is now the preferred form, // and has been since at least ISA v2.03. The mftb instruction has @@ -329,12 +347,12 @@ def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#D let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR8] in { def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } } // Interpretation64Bit @@ -346,213 +364,236 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. let Interpretation64Bit = 1 in { let neverHasSideEffects = 1 in { +let isCodeGenOnly = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm64SExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. +let isCommutable = 1 in { defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, i64:$rB))]>; +} // isCommutable defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, i64:$rB))]>; defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; +} // isCommutable defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; +} // let isCommutable = 1 // Logical ops with immediate. let Defs = [CR0] in { -def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, +def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; -def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, +def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } -def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, +def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; -def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, +def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; -def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, +def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; -def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, +def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; +let isCommutable = 1 in defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), - "add $rT, $rA, $rB", IntSimple, + "add $rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; +let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; + let Defs = [CARRY] in def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>; def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY] in { +let isCommutable = 1 in defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, -1))]>; defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, 0))]>; defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube -1, i64:$rA))]>; defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube 0, i64:$rA))]>; } +} // isCodeGenOnly +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let isAsmParserOnly = 1 in +def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, []>; +let isCommutable = 1 in { defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhd", "$rT, $rA, $rB", IntMulHW, + "mulhd", "$rT, $rA, $rB", IIC_IntMulHW, [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhdu", "$rT, $rA, $rB", IntMulHWU, + "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; +} // isCommutable } } // Interpretation64Bit let isCompare = 1, neverHasSideEffects = 1 in { def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; + "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64; def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; - def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm), - "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; - def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2), - "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; + "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64; + def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm), + "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64; + def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "cmpldi $dst, $src1, $src2", + IIC_IntCompare>, isPPC64; } let neverHasSideEffects = 1 in { defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "sld", "$rA, $rS, $rB", IntRotateD, + "sld", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srd", "$rA, $rS, $rB", IntRotateD, + "srd", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srad", "$rA, $rS, $rB", IntRotateD, + "srad", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit // For fast-isel: let isCodeGenOnly = 1 in { def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), - "extsb $rA, $rS", IntSimple, []>, isPPC64; + "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64; def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), - "extsh $rA, $rS", IntSimple, []>, isPPC64; + "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64; } // isCodeGenOnly for fast-isel defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext i32:$rS))]>, isPPC64; defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), - "sradi", "$rA, $rS, $SH", IntRotateDI, + "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), - "cntlzd", "$rA, $rS", IntGeneral, + "cntlzd", "$rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), - "popcntd $rA, $rS", IntGeneral, + "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), - "popcntw $rA, $rS", IntGeneral, + "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IntDivD, + "divd", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IntDivD, + "divdu", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; +let isCommutable = 1 in defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulld", "$rT, $rA, $rB", IntMulHD, + "mulld", "$rT, $rA, $rB", IIC_IntMulHD, [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>; } @@ -560,7 +601,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -568,43 +609,53 @@ defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), // Rotate instructions. defm RLDCL : MDSForm_1r<30, 8, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDCR : MDSForm_1r<30, 9, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // For fast-isel: let isCodeGenOnly = 1 in def RLDICL_32_64 : MDForm_1<30, 0, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; defm RLDIC : MDForm_1r<30, 2, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>; +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; +} + let isSelect = 1 in def ISEL8 : AForm_4<31, 15, (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } // Interpretation64Bit } // neverHasSideEffects = 1 @@ -618,111 +669,111 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, + "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, + "lwax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1 in { def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, []>, isPPC64, + "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64, PPC970_DGroup_Cracked; def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, []>, isPPC64, + "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64, PPC970_DGroup_Cracked; } // end fast-isel isCodeGenOnly // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLHAU, + "lwaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } } -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -733,7 +784,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), // Full 8-byte loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), - "ld $rD, $src", LdStLD, + "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, @@ -754,30 +805,30 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg), - "ld 2, 8($reg)", LdStLD, + "ld 2, 8($reg)", IIC_LdStLD, [(PPCload_toc i64:$reg)]>, isPPC64; let RST = 2, DS = 10, RA = 1 in def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), - "ld 2, 40(1)", LdStLD, + "ld 2, 40(1)", IIC_LdStLD, [(PPCtoc_restore)]>, isPPC64; } def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src), - "ldx $rD, $src", LdStLD, + "ldx $rD, $src", IIC_LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src), - "ldbrx $rD, $src", LdStLoad, + "ldbrx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; let mayLoad = 1, neverHasSideEffects = 1 in { def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLDU, + "ldu $rD, $addr", IIC_LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLDU, + "ldux $rD, $addr", IIC_LdStLDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -860,78 +911,79 @@ def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; let PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; } // Interpretation64Bit // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), - "std $rS, $dst", LdStSTD, + "std $rS, $dst", IIC_LdStSTD, [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), - "stdx $rS, $dst", LdStSTD, + "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), - "stdbrx $rS, $dst", LdStStore, + "stdbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } // Stores with Update (pre-inc). let PPC970_Unit = 2, mayStore = 1 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), - "stdu $rS, $dst", LdStSTDU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } // Interpretation64Bit +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), + "stdu $rS, $dst", IIC_LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTDU, []>, + "stdux $rS, $dst", IIC_LdStSTDUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; } @@ -966,29 +1018,29 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), let PPC970_Unit = 3, neverHasSideEffects = 1, Uses = [RM] in { // FPU Operations. defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfid", "$frD, $frB", FPGeneral, + "fcfid", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), - "fctid", "$frD, $frB", FPGeneral, + "fctid", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), - "fctidz", "$frD, $frB", FPGeneral, + "fctidz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfidu", "$frD, $frB", FPGeneral, + "fcfidu", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfids", "$frD, $frB", FPGeneral, + "fcfids", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfidus", "$frD, $frB", FPGeneral, + "fcfidus", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiduz", "$frD, $frB", FPGeneral, + "fctiduz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwuz", "$frD, $frB", FPGeneral, + "fctiwuz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } @@ -1006,6 +1058,14 @@ def : Pat<(i64 (anyext i32:$in)), def : Pat<(i32 (trunc i64:$in)), (EXTRACT_SUBREG $in, sub_32)>; +// Implement the 'not' operation with the NOR instruction. +// (we could use the default xori pattern, but nor has lower latency on some +// cores (such as the A2)). +def i64not : OutPatFrag<(ops node:$in), + (NOR8 $in, $in)>; +def : Pat<(not i64:$in), + (i64not $in)>; + // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), (LBZ8 iaddr:$src)>; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index a55abe3..2fd4a3e 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -164,7 +164,7 @@ def vecspltisw : PatLeaf<(build_vector), [{ // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. class VA1a_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VAForm_1a; // VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the @@ -172,7 +172,7 @@ class VA1a_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VA1a_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VAForm_1a; // VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two @@ -180,14 +180,14 @@ class VA1a_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, class VA1a_Int_Ty3 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VAForm_1a; // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. class VX1_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_1; // VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the @@ -195,7 +195,7 @@ class VX1_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VX1_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_1; // VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two @@ -203,13 +203,13 @@ class VX1_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, class VX1_Int_Ty3 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VXForm_1; // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. class VX2_Int_SP xo, string opc, Intrinsic IntID> : VXForm_2; // VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the @@ -217,7 +217,7 @@ class VX2_Int_SP xo, string opc, Intrinsic IntID> class VX2_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_2; //===----------------------------------------------------------------------===// @@ -229,116 +229,118 @@ let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStLoad /*FIXME*/, []>, + "dss $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStLoad /*FIXME*/, []>, + "dssall", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), - "mfvscr $vD", LdStStore, + "mfvscr $vD", IIC_LdStStore, [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), - "mtvscr $vB", LdStLoad, + "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStLoad, + "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStLoad, + "lvehx $vD, $src", IIC_LdStLoad, [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStLoad, + "lvewx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStLoad, + "lvx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStLoad, + "lvxl $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStLoad, + "lvsl $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStLoad, + "lvsr $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStStore, + "stvebx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStStore, + "stvehx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStStore, + "stvewx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), - "stvx $rS, $dst", LdStStore, + "stvx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStStore, + "stvxl $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. +let isCommutable = 1 in { def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vmaddfp $vD, $vA, $vC, $vB", VecFP, + "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; // FIXME: The fma+fneg pattern won't match because fneg is not legal. def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, - (fneg v4f32:$vB))))]>; + (fneg v4f32:$vB))))]>; def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, v8i16>; def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; +} // isCommutable def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, v4i32, v4i32, v16i8>; @@ -346,23 +348,24 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH), - "vsldoi $vD, $vA, $vB, $SH", VecFP, + "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP, [(set v16i8:$vD, (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. +let isCommutable = 1 in { def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddfp $vD, $vA, $vB", VecFP, + "vaddfp $vD, $vA, $vB", IIC_VecFP, [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddubm $vD, $vA, $vB", VecGeneral, + "vaddubm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduhm $vD, $vA, $vB", VecGeneral, + "vadduhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduwm $vD, $vA, $vB", VecGeneral, + "vadduwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; @@ -372,30 +375,31 @@ def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; - - +} // isCommutable + +let isCommutable = 1 in def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vand $vD, $vA, $vB", VecFP, + "vand $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vandc $vD, $vA, $vB", VecFP, + "vandc $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfsx $vD, $vB, $UIMM", VecFP, + "vcfsx $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfux $vD, $vB, $UIMM", VecFP, + "vcfux $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctsxs $vD, $vB, $UIMM", VecFP, + "vctsxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctuxs $vD, $vB, $UIMM", VecFP, + "vctuxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; @@ -404,25 +408,26 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), // to floating-point (sint_to_fp/uint_to_fp) conversions. let isCodeGenOnly = 1, VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfsx $vD, $vB, 0", VecFP, + "vcfsx $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB), - "vctuxs $vD, $vB, 0", VecFP, + "vctuxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfux $vD, $vB, 0", VecFP, + "vcfux $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB), - "vctsxs $vD, $vB, 0", VecFP, + "vctsxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; +let isCommutable = 1 in { def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; @@ -444,24 +449,25 @@ def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; +} // isCommutable def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghb $vD, $vA, $vB", VecFP, + "vmrghb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghh $vD, $vA, $vB", VecFP, + "vmrghh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghw $vD, $vA, $vB", VecFP, + "vmrghw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglb $vD, $vA, $vB", VecFP, + "vmrglb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglh $vD, $vA, $vB", VecFP, + "vmrglh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglw $vD, $vA, $vB", VecFP, + "vmrglw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, @@ -477,6 +483,7 @@ def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, v4i32, v8i16, v4i32>; +let isCommutable = 1 in { def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, v8i16, v16i8>; def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, @@ -493,6 +500,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, v8i16, v16i8>; def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, v4i32, v8i16>; +} // isCommutable def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; @@ -504,16 +512,16 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubfp $vD, $vA, $vB", VecGeneral, + "vsubfp $vD, $vA, $vB", IIC_VecGeneral, [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsububm $vD, $vA, $vB", VecGeneral, + "vsububm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuhm $vD, $vA, $vB", VecGeneral, + "vsubuhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuwm $vD, $vA, $vB", VecGeneral, + "vsubuwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; @@ -534,15 +542,17 @@ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vnor $vD, $vA, $vB", VecFP, + "vnor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, v4i32:$vB)))]>; +let isCommutable = 1 in { def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vor $vD, $vA, $vB", VecFP, + "vor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vxor $vD, $vA, $vB", VecFP, + "vxor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; +} // isCommutable def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; @@ -556,15 +566,15 @@ def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltb $vD, $vB, $UIMM", VecPerm, + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vsplth $vD, $vB, $UIMM", VecPerm, + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltw $vD, $vB, $UIMM", VecPerm, + "vspltw $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; @@ -580,13 +590,13 @@ def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisb $vD, $SIMM", VecPerm, + "vspltisb $vD, $SIMM", IIC_VecPerm, [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltish $vD, $SIMM", VecPerm, + "vspltish $vD, $SIMM", IIC_VecPerm, [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisw $vD, $SIMM", VecPerm, + "vspltisw $vD, $SIMM", IIC_VecPerm, [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. @@ -601,13 +611,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuhum $vD, $vA, $vB", VecFP, + "vpkuhum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuwum $vD, $vA, $vB", VecFP, + "vpkuwum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, @@ -631,10 +641,12 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, // Altivec Comparisons. class VCMP xo, string asmstr, ValueType Ty> - : VXRForm_1; class VCMPo xo, string asmstr, ValueType Ty> - : VXRForm_1 { let Defs = [CR6]; let RC = 1; @@ -676,24 +688,24 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllZerosV))]>; def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllZerosV))]>; def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllOnesV))]>; def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllOnesV))]>; def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 29233d4..7fed2c6 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -14,6 +14,8 @@ class I opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<32> Inst; + field bits<32> SoftFail = 0; + let Size = 4; bit PPC64 = 0; // Default value, override with isPPC64 @@ -67,6 +69,8 @@ class I2 opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<64> Inst; + field bits<64> SoftFail = 0; + let Size = 8; bit PPC64 = 0; // Default value, override with isPPC64 @@ -109,7 +113,7 @@ class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, // 1.7.2 B-Form class BForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<7> BIBO; // 2 bits of BI and 5 bits of BO. bits<3> CR; bits<14> BD; @@ -135,7 +139,7 @@ class BForm_1 opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, class BForm_2 opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<14> BD; let Inst{6-10} = bo; @@ -147,7 +151,7 @@ class BForm_2 opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, class BForm_3 opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<5> BO; bits<5> BI; bits<14> BD; @@ -159,6 +163,19 @@ class BForm_3 opcode, bit aa, bit lk, let Inst{31} = lk; } +class BForm_4 opcode, bits<5> bo, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I { + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.3 SC-Form class SCForm opcode, bits<1> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, @@ -258,6 +275,15 @@ class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class DForm_4_fixedreg_zero opcode, bits<5> R, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : DForm_4 { + let A = R; + let B = R; + let C = 0; +} + class IForm_and_DForm_1 opcode1, bit aa, bit lk, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> @@ -567,6 +593,173 @@ class XForm_16b opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let A = 0; } +// XX*-Form (VSX) +class XX1Form opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +class XX2Form opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2Form_1 opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<3> CR; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX2Form_2 opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-13} = 0; + let Inst{14-15} = D; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<3> CR; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX3Form_2 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = 0; + let Inst{22-23} = D; + let Inst{24-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_Rc opcode, bits<7> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = RC; + let Inst{22-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX4Form opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-25} = XC{4-0}; + let Inst{26-27} = xo; + let Inst{28} = XC{5}; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + // DCB_Form - Form X instruction, used for dcb* instructions. class DCB_Form xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> @@ -664,6 +857,12 @@ class XLForm_2_br opcode, bits<10> xo, bit lk, let BH = 0; } +class XLForm_2_br2 opcode, bits<10> xo, bits<5> bo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> + : XLForm_2 { + let BO = bo; + let BH = 0; +} class XLForm_2_ext opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 315ad04..939bbdc 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -18,16 +18,19 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -45,6 +48,13 @@ opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, static cl::opt DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden); +static cl::opt DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", +cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden); + +static cl::opt VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", +cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), +cl::Hidden); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -61,7 +71,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCScoreboardHazardRecognizer(II, DAG); + return new ScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); @@ -74,6 +84,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( const ScheduleDAG *DAG) const { unsigned Directive = TM.getSubtarget().getDarwinDirective(); + if (Directive == PPC::DIR_PWR7) + return new PPCDispatchGroupSBHazardRecognizer(II, DAG); + // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { @@ -82,7 +95,57 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCHazardRecognizer970(TM); } - return new PPCScoreboardHazardRecognizer(II, DAG); + return new ScoreboardHazardRecognizer(II, DAG); +} + + +int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const { + int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); + + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + unsigned Reg = DefMO.getReg(); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool IsRegCR; + if (TRI->isVirtualRegister(Reg)) { + const MachineRegisterInfo *MRI = + &DefMI->getParent()->getParent()->getRegInfo(); + IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); + } else { + IsRegCR = PPC::CRRCRegClass.contains(Reg) || + PPC::CRBITRCRegClass.contains(Reg); + } + + if (UseMI->isBranch() && IsRegCR) { + if (Latency < 0) + Latency = getInstrLatency(ItinData, DefMI); + + // On some cores, there is an additional delay between writing to a condition + // register, and using it from a branch. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + switch (Directive) { + default: break; + case PPC::DIR_7400: + case PPC::DIR_750: + case PPC::DIR_970: + case PPC::DIR_E5500: + case PPC::DIR_PWR4: + case PPC::DIR_PWR5: + case PPC::DIR_PWR5X: + case PPC::DIR_PWR6: + case PPC::DIR_PWR6X: + case PPC::DIR_PWR7: + Latency += 2; + break; + } + } + + return Latency; } // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. @@ -110,7 +173,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::LFS: case PPC::LFD: case PPC::RESTORE_CR: + case PPC::RESTORE_CRBIT: case PPC::LVX: + case PPC::LXVD2X: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -134,7 +199,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::STFS: case PPC::STFD: case PPC::SPILL_CR: + case PPC::SPILL_CRBIT: case PPC::STVX: + case PPC::STXVD2X: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -156,7 +223,9 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI && - MI->getOpcode() != PPC::RLWIMIo) + MI->getOpcode() != PPC::RLWIMIo && + MI->getOpcode() != PPC::RLWIMI8 && + MI->getOpcode() != PPC::RLWIMI8o) return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. @@ -174,6 +243,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); unsigned Reg2 = MI->getOperand(2).getReg(); + unsigned SubReg1 = MI->getOperand(1).getSubReg(); + unsigned SubReg2 = MI->getOperand(2).getSubReg(); bool Reg1IsKill = MI->getOperand(1).isKill(); bool Reg2IsKill = MI->getOperand(2).isKill(); bool ChangeReg0 = false; @@ -183,6 +254,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Must be two address instruction! assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); + assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); Reg2IsKill = false; ChangeReg0 = true; } @@ -203,10 +275,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { .addImm((MB-1) & 31); } - if (ChangeReg0) + if (ChangeReg0) { MI->getOperand(0).setReg(Reg2); + MI->getOperand(0).setSubReg(SubReg2); + } MI->getOperand(2).setReg(Reg1); MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setSubReg(SubReg1); + MI->getOperand(1).setSubReg(SubReg2); MI->getOperand(2).setIsKill(Reg1IsKill); MI->getOperand(1).setIsKill(Reg2IsKill); @@ -216,13 +292,37 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } +bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + // For VSX A-Type FMA instructions, it is the first two operands that can be + // commuted, however, because the non-encoded tied input operand is listed + // first, the operands to swap are actually the second and third. + + int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); + if (AltOpc == -1) + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + + SrcOpIdx1 = 2; + SrcOpIdx2 = 3; + return true; +} + void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { + // This function is used for scheduling, and the nop wanted here is the type + // that terminates dispatch groups on the POWER cores. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + unsigned Opcode; + switch (Directive) { + default: Opcode = PPC::NOP; break; + case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; + case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; + } + DebugLoc DL; - BuildMI(MBB, MI, DL, get(PPC::NOP)); + BuildMI(MBB, MI, DL, get(Opcode)); } - // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. @@ -263,6 +363,22 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; + } else if (LastInst->getOpcode() == PPC::BC) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } else if (LastInst->getOpcode() == PPC::BCn) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(LastInst->getOperand(0)); + return false; } else if (LastInst->getOpcode() == PPC::BDNZ8 || LastInst->getOpcode() == PPC::BDNZ) { if (!LastInst->getOperand(0).isMBB()) @@ -310,6 +426,26 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; + } else if (SecondLastInst->getOpcode() == PPC::BC && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (SecondLastInst->getOpcode() == PPC::BCn && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || SecondLastInst->getOpcode() == PPC::BDNZ) && LastInst->getOpcode() == PPC::B) { @@ -367,6 +503,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { --I; } if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 0; @@ -379,6 +516,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 1; @@ -408,9 +546,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); return 1; } @@ -419,9 +561,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } @@ -506,6 +652,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; + case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } unsigned FirstReg = SwapOps ? FalseReg : TrueReg, @@ -534,6 +682,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + // We can end up with self copies and similar things as a result of VSX copy + // legalization. Promote them here. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (PPC::F8RCRegClass.contains(DestReg) && + PPC::VSLRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && SrcReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + DestReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(DestReg) && + PPC::VSHRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && SrcReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + DestReg = SuperReg; + } else if (PPC::F8RCRegClass.contains(SrcReg) && + PPC::VSLRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && DestReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + SrcReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(SrcReg) && + PPC::VSHRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && DestReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + SrcReg = SuperReg; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; @@ -545,6 +734,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::MCRF; else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::VOR; + else if (PPC::VSRCRegClass.contains(DestReg, SrcReg)) + // There are two different ways this can be done: + // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only + // issue in VSU pipeline 0. + // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but + // can go to either pipeline. + // We'll always use xxlor here, because in practically all cases where + // copies are generated, they are close enough to some use that the + // lower-latency form is preferable. + Opc = PPC::XXLOR; + else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::XXLORf; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -570,12 +771,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // update isStoreToStackSlot. DebugLoc DL; - if (PPC::GPRCRegClass.hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) .addReg(SrcReg, getKillRegState(isKill)), @@ -597,45 +800,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - // FIXME: We use CRi here because there is no mtcrf on a bit. Since the - // backend currently only uses CR1EQ as an individual bit, this should - // not cause any bug. If we need other uses of CR bits, the following - // code may be invalid. - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -695,10 +882,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // Note: If additional load instructions are added here, // update isLoadFromStackSlot. - if (PPC::GPRCRegClass.hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), DestReg), FrameIdx)); - } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC) || + PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { @@ -713,40 +902,22 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - - unsigned Reg = 0; - if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || - DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || - DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || - DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || - DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || - DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || - DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || - DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || - DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) - Reg = PPC::CR7; - - return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CRBIT), DestReg), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -933,9 +1104,17 @@ bool PPCInstrInfo::PredicateInstruction( MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); - } else { + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI->setDesc(get(PPC::BCLR)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(PPC::BCLRn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else { + MI->setDesc(get(PPC::BCCLR)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); } @@ -947,6 +1126,22 @@ bool PPCInstrInfo::PredicateInstruction( MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BC)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BCn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } else { MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); MI->RemoveOperand(0); @@ -966,8 +1161,23 @@ bool PPCInstrInfo::PredicateInstruction( bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : - (setLR ? PPC::BCCTRL : PPC::BCCTR))); + + if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : + (setLR ? PPC::BCCTRL : PPC::BCCTR))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : + (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } + + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) : + (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); @@ -1152,8 +1362,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, if (equalityOnly) { // We need to check the uses of the condition register in order to reject // non-equality comparisons. - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), - IE = MRI->use_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg), + IE = MRI->use_instr_end(); I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { unsigned Pred = UseMI->getOperand(0).getImm(); @@ -1175,8 +1385,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); I != EL; ++I) { bool FoundUse = false; - for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg), - JE = MRI->use_end(); J != JE; ++J) + for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg), + JE = MRI->use_instr_end(); J != JE; ++J) if (&*J == &*I) { FoundUse = true; break; @@ -1285,15 +1495,16 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, } if (ShouldSwap) - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), - IE = MRI->use_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator + I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); + I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); assert((!equalityOnly || Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) && "Invalid predicate for equality-only optimization"); - PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)), + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), PPC::getSwappedPredicate(Pred))); } else if (UseMI->getOpcode() == PPC::ISEL || UseMI->getOpcode() == PPC::ISEL8) { @@ -1306,7 +1517,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, else if (NewSubReg == PPC::sub_gt) NewSubReg = PPC::sub_lt; - SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)), + SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)), NewSubReg)); } else // We need to abort on a user we don't understand. return false; @@ -1318,7 +1529,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, CmpInstr->eraseFromParent(); MachineBasicBlock::iterator MII = MI; - BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(), + BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), get(TargetOpcode::COPY), CRReg) .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); @@ -1363,26 +1574,497 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, /// instruction may be. This returns the maximum number of bytes. /// unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - case PPC::INLINEASM: { // Inline Asm: Variable size. + unsigned Opcode = MI->getOpcode(); + + if (Opcode == PPC::INLINEASM) { const MachineFunction *MF = MI->getParent()->getParent(); const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); - } - case PPC::PROLOG_LABEL: - case PPC::EH_LABEL: - case PPC::GC_LABEL: - case PPC::DBG_VALUE: - return 0; - case PPC::BL8_NOP: - case PPC::BLA8_NOP: - return 8; - default: - return 4; // PowerPC instructions are all 4 bytes + } else { + const MCInstrDesc &Desc = get(Opcode); + return Desc.getSize(); } } #undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-fma-mutate" + +namespace { + // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers + // (Altivec and scalar floating-point registers), we need to transform the + // copies into subregister copies with other restrictions. + struct PPCVSXFMAMutate : public MachineFunctionPass { + static char ID; + PPCVSXFMAMutate() : MachineFunctionPass(ID) { + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + } + + LiveIntervals *LIS; + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + + // The default (A-type) VSX FMA form kills the addend (it is taken from + // the target register, which is then updated to reflect the result of + // the FMA). If the instruction, however, kills one of the registers + // used for the product, then we can use the M-form instruction (which + // will take that value from the to-be-defined register). + + int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); + if (AltOpc == -1) + continue; + + // This pass is run after register coalescing, and so we're looking for + // a situation like this: + // ... + // %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9 + // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16, + // %RM; VSLRC:%vreg5,%vreg17,%vreg16 + // ... + // %vreg9 = XSMADDADP %vreg9, %vreg17, %vreg19, + // %RM; VSLRC:%vreg9,%vreg17,%vreg19 + // ... + // Where we can eliminate the copy by changing from the A-type to the + // M-type instruction. Specifically, for this example, this means: + // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16, + // %RM; VSLRC:%vreg5,%vreg17,%vreg16 + // is replaced by: + // %vreg16 = XSMADDMDP %vreg16, %vreg18, %vreg9, + // %RM; VSLRC:%vreg16,%vreg18,%vreg9 + // and we remove: %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9 + + SlotIndex FMAIdx = LIS->getInstructionIndex(MI); + + VNInfo *AddendValNo = + LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); + MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); + + // The addend and this instruction must be in the same block. + + if (!AddendMI || AddendMI->getParent() != MI->getParent()) + continue; + + // The addend must be a full copy within the same register class. + + if (!AddendMI->isFullCopy()) + continue; + + unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != + MRI.getRegClass(AddendSrcReg)) + continue; + } else { + // If AddendSrcReg is a physical register, make sure the destination + // register class contains it. + if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) + ->contains(AddendSrcReg)) + continue; + } + + // In theory, there could be other uses of the addend copy before this + // fma. We could deal with this, but that would require additional + // logic below and I suspect it will not occur in any relevant + // situations. + bool OtherUsers = false; + for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); + J != JE; --J) + if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { + OtherUsers = true; + break; + } + + if (OtherUsers) + continue; + + // Find one of the product operands that is killed by this instruction. + + unsigned KilledProdOp = 0, OtherProdOp = 0; + if (LIS->getInterval(MI->getOperand(2).getReg()) + .Query(FMAIdx).isKill()) { + KilledProdOp = 2; + OtherProdOp = 3; + } else if (LIS->getInterval(MI->getOperand(3).getReg()) + .Query(FMAIdx).isKill()) { + KilledProdOp = 3; + OtherProdOp = 2; + } + + // If there are no killed product operands, then this transformation is + // likely not profitable. + if (!KilledProdOp) + continue; + + // In order to replace the addend here with the source of the copy, + // it must still be live here. + if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx)) + continue; + + // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. + + unsigned AddReg = AddendMI->getOperand(1).getReg(); + unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg(); + unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg(); + + unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); + unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg(); + unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg(); + + bool AddRegKill = AddendMI->getOperand(1).isKill(); + bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill(); + bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill(); + + bool AddRegUndef = AddendMI->getOperand(1).isUndef(); + bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); + bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); + + unsigned OldFMAReg = MI->getOperand(0).getReg(); + + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && + "Addend copy not tied to old FMA output!"); + + DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;); + + MI->getOperand(0).setReg(KilledProdReg); + MI->getOperand(1).setReg(KilledProdReg); + MI->getOperand(3).setReg(AddReg); + MI->getOperand(2).setReg(OtherProdReg); + + MI->getOperand(0).setSubReg(KilledProdSubReg); + MI->getOperand(1).setSubReg(KilledProdSubReg); + MI->getOperand(3).setSubReg(AddSubReg); + MI->getOperand(2).setSubReg(OtherProdSubReg); + + MI->getOperand(1).setIsKill(KilledProdRegKill); + MI->getOperand(3).setIsKill(AddRegKill); + MI->getOperand(2).setIsKill(OtherProdRegKill); + + MI->getOperand(1).setIsUndef(KilledProdRegUndef); + MI->getOperand(3).setIsUndef(AddRegUndef); + MI->getOperand(2).setIsUndef(OtherProdRegUndef); + + MI->setDesc(TII->get(AltOpc)); + + DEBUG(dbgs() << " -> " << *MI); + + // The killed product operand was killed here, so we can reuse it now + // for the result of the fma. + + LiveInterval &FMAInt = LIS->getInterval(OldFMAReg); + VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot()); + for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end(); + UI != UE;) { + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); + ++UI; + + // Don't replace the result register of the copy we're about to erase. + if (UseMI == AddendMI) + continue; + + UseMO.setReg(KilledProdReg); + UseMO.setSubReg(KilledProdSubReg); + } + + // Extend the live intervals of the killed product operand to hold the + // fma result. + + LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); + for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); + AI != AE; ++AI) { + // Don't add the segment that corresponds to the original copy. + if (AI->valno == AddendValNo) + continue; + + VNInfo *NewFMAValNo = + NewFMAInt.getNextValue(AI->start, + LIS->getVNInfoAllocator()); + + NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, + NewFMAValNo)); + } + DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); + + FMAInt.removeValNo(FMAValNo); + DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); + + // Remove the (now unused) copy. + + DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); + LIS->RemoveMachineInstrFromMaps(AddendMI); + AddendMI->eraseFromParent(); + + Changed = true; + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + LIS = &getAnalysis(); + + TM = static_cast(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + if (DisableVSXFMAMutate) + return Changed; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, + "PowerPC VSX FMA Mutation", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, + "PowerPC VSX FMA Mutation", false, false) + +char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; + +char PPCVSXFMAMutate::ID = 0; +FunctionPass* +llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy" + +namespace llvm { + void initializePPCVSXCopyPass(PassRegistry&); +} + +namespace { + // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers + // (Altivec and scalar floating-point registers), we need to transform the + // copies into subregister copies with other restrictions. + struct PPCVSXCopy : public MachineFunctionPass { + static char ID; + PPCVSXCopy() : MachineFunctionPass(ID) { + initializePPCVSXCopyPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + + bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, + MachineRegisterInfo &MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + return RC->hasSubClassEq(MRI.getRegClass(Reg)); + } else if (RC->contains(Reg)) { + return true; + } + + return false; + } + + bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI); + } + + bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI); + } + + bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); + } + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (!MI->isFullCopy()) + continue; + + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if ( IsVSReg(DstMO.getReg(), MRI) && + !IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *to* a VSX register from a non-VSX register. + Changed = true; + + const TargetRegisterClass *SrcRC = + IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(SrcMO.getReg(), MRI) || + IsVRReg(SrcMO.getReg(), MRI)) && + "Unknown source for a VSX copy"); + + unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) + .addImm(1) // add 1, not 0, because there is no implicit clearing + // of the high bits. + .addOperand(SrcMO) + .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + + // The source of the original copy is now the new virtual register. + SrcMO.setReg(NewVReg); + } else if (!IsVSReg(DstMO.getReg(), MRI) && + IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *from* a VSX register to a non-VSX register. + Changed = true; + + const TargetRegisterClass *DstRC = + IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(DstMO.getReg(), MRI) || + IsVRReg(DstMO.getReg(), MRI)) && + "Unknown destination for a VSX copy"); + + // Copy the VSX value into a new VSX register of the correct subclass. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg) + .addOperand(SrcMO); + + // Transform the original copy into a subregister extraction copy. + SrcMO.setReg(NewVReg); + SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + } + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + TM = static_cast(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, + "PowerPC VSX Copy Legalization", false, false) + +char PPCVSXCopy::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy-cleanup" + +namespace llvm { + void initializePPCVSXCopyCleanupPass(PassRegistry&); +} + +namespace { + // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX + // registers (mostly because the ABI code still places all values into the + // "traditional" floating-point and vector registers). Remove them here. + struct PPCVSXCopyCleanup : public MachineFunctionPass { + static char ID; + PPCVSXCopyCleanup() : MachineFunctionPass(ID) { + initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + SmallVector ToDelete; + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (MI->getOpcode() == PPC::XXLOR && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg() && + MI->getOperand(0).getReg() == MI->getOperand(2).getReg()) + ToDelete.push_back(MI); + } + + if (!ToDelete.empty()) + Changed = true; + + for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) { + DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]); + ToDelete[i]->eraseFromParent(); + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + TM = static_cast(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE, + "PowerPC VSX Copy Cleanup", false, false) + +char PPCVSXCopyCleanup::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); } + +#undef DEBUG_TYPE #define DEBUG_TYPE "ppc-early-ret" STATISTIC(NumBCLR, "Number of early conditional returns"); STATISTIC(NumBLR, "Number of early returns"); @@ -1424,7 +2106,7 @@ protected: if (J->getOpcode() == PPC::B) { if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the - // branch with a blr. + // branch with a blr. BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR)); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); @@ -1436,7 +2118,7 @@ protected: if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR)) + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) .addImm(J->getOperand(0).getImm()) .addReg(J->getOperand(1).getReg()); MachineBasicBlock::iterator K = J--; @@ -1445,6 +2127,20 @@ protected: ++NumBCLR; continue; } + } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) { + if (J->getOperand(1).getMBB() == &ReturnMBB) { + // This is a conditional branch to the return. Replace the branch + // with a bclr. + BuildMI(**PI, J, J->getDebugLoc(), + TII->get(J->getOpcode() == PPC::BC ? + PPC::BCLR : PPC::BCLRn)) + .addReg(J->getOperand(0).getReg()); + MachineBasicBlock::iterator K = J--; + K->eraseFromParent(); + BlockChanged = true; + ++NumBCLR; + continue; + } } else if (J->isBranch()) { if (J->isIndirectBranch()) { if (ReturnMBB.hasAddressTaken()) @@ -1466,7 +2162,7 @@ protected: if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB)) OtherReference = true; - // Predecessors are stored in a vector and can't be removed here. + // Predecessors are stored in a vector and can't be removed here. if (!OtherReference && BlockChanged) { PredToRemove.push_back(*PI); } @@ -1509,7 +2205,7 @@ public: return Changed; for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + MachineBasicBlock &B = *I++; if (processBlock(B)) Changed = true; } @@ -1529,4 +2225,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, char PPCEarlyReturn::ID = 0; FunctionPass* llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } - diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index f140c41..3c8117c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,18 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, + UseNode, UseIdx); + } + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const; @@ -107,6 +119,9 @@ public: // rotate amt is zero. We also have to munge the immediates a bit. virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const; + virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 2bd3aad..1d984ab 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -99,6 +99,8 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; +def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; + def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; @@ -288,6 +290,12 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +def imm64ZExt32 : Operand, ImmLeaf(Imm); +}]>; + // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order @@ -404,6 +412,14 @@ def crrc : RegisterOperand { let ParserMatchClass = PPCRegCRRCAsmOperand; } +def PPCU2ImmAsmOperand : AsmOperandClass { + let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let RenderMethod = "addImmOperands"; +} +def u2imm : Operand { + let PrintMethod = "printU2ImmOperand"; + let ParserMatchClass = PPCU2ImmAsmOperand; +} def PPCS5ImmAsmOperand : AsmOperandClass { let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; let RenderMethod = "addImmOperands"; @@ -411,6 +427,7 @@ def PPCS5ImmAsmOperand : AsmOperandClass { def s5imm : Operand { let PrintMethod = "printS5ImmOperand"; let ParserMatchClass = PPCS5ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<5>"; } def PPCU5ImmAsmOperand : AsmOperandClass { let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; @@ -419,6 +436,7 @@ def PPCU5ImmAsmOperand : AsmOperandClass { def u5imm : Operand { let PrintMethod = "printU5ImmOperand"; let ParserMatchClass = PPCU5ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<5>"; } def PPCU6ImmAsmOperand : AsmOperandClass { let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; @@ -427,6 +445,7 @@ def PPCU6ImmAsmOperand : AsmOperandClass { def u6imm : Operand { let PrintMethod = "printU6ImmOperand"; let ParserMatchClass = PPCU6ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<6>"; } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; @@ -436,6 +455,7 @@ def s16imm : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; @@ -445,6 +465,7 @@ def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; @@ -457,6 +478,7 @@ def s17imm : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; @@ -502,6 +524,7 @@ def PPCCRBitMaskOperand : AsmOperandClass { def crbitm: Operand { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; + let DecoderMethod = "decodeCRBitMOperand"; let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands @@ -539,6 +562,7 @@ def memri : Operand { let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; + let DecoderMethod = "decodeMemRIOperands"; } def memrr : Operand { let PrintMethod = "printMemRegReg"; @@ -548,6 +572,7 @@ def memrix : Operand { // memri where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; + let DecoderMethod = "decodeMemRIXOperands"; } // A single-register address. This is used with the SjLj @@ -555,6 +580,14 @@ def memrix : Operand { // memri where the imm is 4-aligned. def memr : Operand { let MIOperandInfo = (ops ptr_rc:$ptrreg); } +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} +def tlsreg32 : Operand { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} // PowerPC Predicate operand. def pred : Operand { @@ -580,6 +613,7 @@ def iaddroff : ComplexPattern; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -613,20 +647,6 @@ multiclass XForm_6rc opcode, bits<10> xo, dag OOL, dag IOL, } } -multiclass XForm_10r opcode, bits<10> xo, dag OOL, dag IOL, - string asmbase, string asmstr, InstrItinClass itin, - list pattern> { - let BaseName = asmbase in { - def NAME : XForm_10, RecFormRel; - let Defs = [CR0] in - def o : XForm_10, isDOT, RecFormRel; - } -} - multiclass XForm_10rc opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { @@ -887,30 +907,63 @@ let usesCustomInserter = 1, // Expanded after instruction selection. def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond, + gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", + [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; + def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", + [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; + def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, + f4rc:$T, f4rc:$F), "#SELECT_F4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; + def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, + f8rc:$T, f8rc:$F), "#SELECT_F8", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + vrrc:$T, vrrc:$F), "#SELECT_VRRC", + [(set v4i32:$dst, + (select i1:$cond, v4i32:$T, v4i32:$F))]>; } // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -let mayStore = 1 in +let mayStore = 1 in { def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; +def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F), + "#SPILL_CRBIT", []>; +} // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. -let mayLoad = 1 in +let mayLoad = 1 in { def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; +def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F), + "#RESTORE_CRBIT", []>; +} let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { - def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>; - let isCodeGenOnly = 1 in - def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>; + let isCodeGenOnly = 1 in { + def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>; + def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>; + } } } @@ -921,10 +974,10 @@ let Defs = [LR] in let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, [(br bb:$dst)]>; def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), - "ba $dst", BrB, []>; + "ba $dst", IIC_BrB, []>; } // BCC represents an arbitrary conditional branch on a predicate. @@ -938,23 +991,39 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; let isReturn = 1, Uses = [LR, RM] in - def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), - "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>; + def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), + "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; + } + + let isCodeGenOnly = 1 in { + let Pattern = [(brcond i1:$bi, bb:$dst)] in + def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 12, $bi, $dst">; + + let Pattern = [(brcond (not i1:$bi), bb:$dst)] in + def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 4, $bi, $dst">; + + let isReturn = 1, Uses = [LR, RM] in + def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), + "bclr 12, $bi, 0", IIC_BrB, []>; + def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), + "bclr 4, $bi, 0", IIC_BrB, []>; } let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), - "bdzlr+", BrB, []>; + "bdzlr+", IIC_BrB, []>; def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), - "bdnzlr+", BrB, []>; + "bdnzlr+", IIC_BrB, []>; def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), - "bdzlr-", BrB, []>; + "bdzlr-", IIC_BrB, []>; def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), - "bdnzlr-", BrB, []>; + "bdnzlr-", IIC_BrB, []>; } let Defs = [CTR], Uses = [CTR] in { @@ -997,33 +1066,54 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; + + def BCL : BForm_4<16, 12, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 12, $bi, $dst">; + def BCLn : BForm_4<16, 4, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 4, $bi, $dst">; } } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>; + let isCodeGenOnly = 1 in { + def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>; + def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>; + } } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), - "blrl", BrB, []>; + "blrl", IIC_BrB, []>; + + let isCodeGenOnly = 1 in { + def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), + "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; - let isCodeGenOnly = 1 in - def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), - "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>; + def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi), + "bclrl 12, $bi, 0", IIC_BrB, []>; + def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi), + "bclrl 4, $bi, 0", IIC_BrB, []>; + } } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), @@ -1053,17 +1143,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), - "bdzlrl", BrB, []>; + "bdzlrl", IIC_BrB, []>; def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), - "bdnzlrl", BrB, []>; + "bdnzlrl", IIC_BrB, []>; def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), - "bdzlrl+", BrB, []>; + "bdzlrl+", IIC_BrB, []>; def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), - "bdnzlrl+", BrB, []>; + "bdnzlrl+", IIC_BrB, []>; def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), - "bdzlrl-", BrB, []>; + "bdzlrl-", IIC_BrB, []>; def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), - "bdnzlrl-", BrB, []>; + "bdnzlrl-", IIC_BrB, []>; } } @@ -1089,19 +1179,19 @@ let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in -def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In32BitMode]>; +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In32BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; } @@ -1127,33 +1217,33 @@ let isBranch = 1, isTerminator = 1 in { // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), - "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; + "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>; } // DCB* instructions. -def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), - "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", + IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), - "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, +def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst", + IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), - "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst", + IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), - "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst", + IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), - "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, +def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst", + IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), - "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, +def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst", + IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), - "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst", + IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), - "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", + IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), @@ -1241,26 +1331,26 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), - "lwarx $rD, $src", LdStLWARX, + "lwarx $rD, $src", IIC_LdStLWARX, [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), - "stwcx. $rS, $dst", LdStSTWCX, + "stwcx. $rS, $dst", IIC_LdStSTWCX, [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), - "twi $to, $rA, $imm", IntTrapW, []>; + "twi $to, $rA, $imm", IIC_IntTrapW, []>; def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), - "tw $to, $rA, $rB", IntTrapW, []>; + "tw $to, $rA, $rB", IIC_IntTrapW, []>; def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), - "tdi $to, $rA, $imm", IntTrapD, []>; + "tdi $to, $rA, $imm", IIC_IntTrapD, []>; def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), - "td $to, $rA, $rB", IntTrapD, []>; + "td $to, $rA, $rB", IIC_IntTrapD, []>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -1269,56 +1359,56 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFD, + "lfs $rD, $src", IIC_LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), - "lfd $rD, $src", LdStLFD, + "lfd $rD, $src", IIC_LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfsu $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfdu $rD, $addr", LdStLFDU, + "lfdu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -1326,37 +1416,37 @@ def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLFDU, + "lfsux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLFDU, + "lfdux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -1366,45 +1456,45 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStLoad, + "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStLoad, + "lwbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFD, + "lfsx $frD, $src", IIC_LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFD, + "lfdx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src), - "lfiwax $frD, $src", LdStLFD, + "lfiwax $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src), - "lfiwzx $frD, $src", LdStLFD, + "lfiwzx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } // Load Multiple def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), - "lmw $rD, $src", LdStLMW, []>; + "lmw $rD, $src", IIC_LdStLMW, []>; //===----------------------------------------------------------------------===// // PPC32 Store Instructions. @@ -1413,38 +1503,38 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), - "stfs $rS, $dst", LdStSTFD, + "stfs $rS, $dst", IIC_LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), - "stfd $rS, $dst", LdStSTFD, + "stfd $rS, $dst", IIC_LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), - "stfsu $rS, $dst", LdStSTFDU, []>, + "stfsu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), - "stfdu $rS, $dst", LdStSTFDU, []>, + "stfdu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } @@ -1465,59 +1555,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), // Indexed (r+r) Stores. let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStStore, + "sthbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStStore, + "stwbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStSTFD, + "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStSTFD, + "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStSTFD, + "stfdx $frS, $dst", IIC_LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), - "stfsux $rS, $dst", LdStSTFDU, []>, + "stfsux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst), - "stfdux $rS, $dst", LdStSTFDU, []>, + "stfdux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } @@ -1538,11 +1628,20 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), // Store Multiple def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), - "stmw $rS, $dst", LdStLMW, []>; + "stmw $rS, $dst", IIC_LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), - "sync $L", LdStSync, []>; -def : Pat<(int_ppc_sync), (SYNC 0)>; + "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>; + +let isCodeGenOnly = 1 in { + def MSYNC : XForm_24_sync<31, 598, (outs), (ins), + "msync", IIC_LdStSync, []>, Requires<[IsBookE]> { + let L = 0; + } +} + +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>; //===----------------------------------------------------------------------===// // PPC32 Arithmetic Instructions. @@ -1550,41 +1649,41 @@ def : Pat<(int_ppc_sync), (SYNC 0)>; let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic. $rD, $rA, $imm", IntGeneral, + "addic. $rD, $rA, $imm", IIC_IntGeneral, []>, isDOT, RecFormRel; } def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), - "la $rD, $sym($rA)", IntGeneral, + "la $rD, $sym($rA)", IIC_IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm32SExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } @@ -1592,154 +1691,170 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CR0] in { def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; -def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, + +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, []>; +let isCodeGenOnly = 1 in { +// The POWER6 and POWER7 have special group-terminating nops. +def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), + "ori 1, 1, 0", IIC_IntSimple, []>; +def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), + "ori 2, 2, 0", IIC_IntSimple, []>; +} + let isCompare = 1, neverHasSideEffects = 1 in { def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), - "cmpwi $crD, $rA, $imm", IntCompare>; + "cmpwi $crD, $rA, $imm", IIC_IntCompare>; def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), - "cmplwi $dst, $src1, $src2", IntCompare>; + "cmplwi $dst, $src1, $src2", IIC_IntCompare>; } } let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. +let isCommutable = 1 in { defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, i32:$rB))]>; +} // isCommutable defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, i32:$rB))]>; defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; +} // isCommutable defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; +} // isCommutable defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "slw", "$rA, $rS, $rB", IntGeneral, + "slw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "srw", "$rA, $rS, $rB", IntGeneral, + "srw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "sraw", "$rA, $rS, $rB", IntShift, + "sraw", "$rA, $rS, $rB", IIC_IntShift, [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } let PPC970_Unit = 1 in { // FXU Operations. let neverHasSideEffects = 1 in { defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), - "srawi", "$rA, $rS, $SH", IntShift, + "srawi", "$rA, $rS, $SH", IIC_IntShift, [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), - "cntlzw", "$rA, $rS", IntGeneral, + "cntlzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctlz i32:$rS))]>; defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; } let isCompare = 1, neverHasSideEffects = 1 in { def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmpw $crD, $rA, $rB", IntCompare>; + "cmpw $crD, $rA, $rB", IIC_IntCompare>; def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmplw $crD, $rA, $rB", IntCompare>; + "cmplw $crD, $rA, $rB", IIC_IntCompare>; } } let PPC970_Unit = 3 in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), -// "fcmpo $crD, $fA, $fB", FPCompare>; +// "fcmpo $crD, $fA, $fB", IIC_FPCompare>; let isCompare = 1, neverHasSideEffects = 1 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; } let Uses = [RM] in { let neverHasSideEffects = 1 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiw", "$frD, $frB", FPGeneral, + "fctiw", "$frD, $frB", IIC_FPGeneral, []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwz", "$frD, $frB", FPGeneral, + "fctiwz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), - "frsp", "$frD, $frB", FPGeneral, + "frsp", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (frnd f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (frnd f32:$frB))]>; } let neverHasSideEffects = 1 in { - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fceil f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ftrunc f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), - "fsqrt", "$frD, $frB", FPSqrt, + "fsqrt", "$frD, $frB", IIC_FPSqrtD, [(set f64:$frD, (fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), - "fsqrts", "$frD, $frB", FPSqrt, + "fsqrts", "$frD, $frB", IIC_FPSqrtS, [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1751,54 +1866,54 @@ let Uses = [RM] in { /// sneak into a d-group with a store). let neverHasSideEffects = 1 in defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), - "fmr", "$frD, $frB", FPGeneral, + "fmr", "$frD, $frB", IIC_FPGeneral, []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fabs f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fabs f64:$frB))]>; defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg (fabs f32:$frB)))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg (fabs f64:$frB)))]>; defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), - "fre", "$frD, $frB", FPGeneral, + "fre", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfre f64:$frB))]>; defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), - "fres", "$frD, $frB", FPGeneral, + "fres", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfre f32:$frB))]>; defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), - "frsqrte", "$frD, $frB", FPGeneral, + "frsqrte", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), - "frsqrtes", "$frD, $frB", FPGeneral, + "frsqrtes", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } @@ -1806,57 +1921,67 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), // let neverHasSideEffects = 1 in def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), - "mcrf $BF, $BFA", BrMCR>, + "mcrf $BF, $BFA", IIC_BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; +let isCommutable = 1 in { def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crand $CRD, $CRA, $CRB", BrCR, []>; + "crand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>; def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnand $CRD, $CRA, $CRB", BrCR, []>; + "crnand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>; def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "cror $CRD, $CRA, $CRB", BrCR, []>; + "cror $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>; def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crxor $CRD, $CRA, $CRB", BrCR, []>; + "crxor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>; def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnor $CRD, $CRA, $CRB", BrCR, []>; + "crnor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>; def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "creqv $CRD, $CRA, $CRB", BrCR, []>; + "creqv $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>; +} // isCommutable def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crandc $CRD, $CRA, $CRB", BrCR, []>; + "crandc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>; def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crorc $CRD, $CRA, $CRB", BrCR, []>; + "crorc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), - "creqv $dst, $dst, $dst", BrCR, - []>; + "creqv $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 1)]>; def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), - "crxor $dst, $dst, $dst", BrCR, - []>; + "crxor $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 0)]>; let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), - "creqv 6, 6, 6", BrCR, + "creqv 6, 6, 6", IIC_BrCR, [(PPCcr6set)]>; def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), - "crxor 6, 6, 6", BrCR, + "crxor 6, 6, 6", IIC_BrCR, [(PPCcr6unset)]>; } } @@ -1865,38 +1990,38 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), // def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), - "mfspr $RT, $SPR", SprMFSPR>; + "mfspr $RT, $SPR", IIC_SprMFSPR>; def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), - "mtspr $SPR, $RT", SprMTSPR>; + "mtspr $SPR, $RT", IIC_SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), - "mftb $RT, $SPR", SprMFTB>, Deprecated; + "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { let Pattern = [(int_ppc_mtctr i32:$rS)] in def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR] in { def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1905,19 +2030,19 @@ let isCodeGenOnly = 1 in { // like a GPR on the PPC970. As such, copies in and out have the same // performance characteristics as an OR instruction. def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, (outs VRSAVERC:$reg), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins VRSAVERC:$reg), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1935,20 +2060,20 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), let neverHasSideEffects = 1 in { def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -1962,18 +2087,18 @@ let usesCustomInserter = 1, Uses = [RM] in { // to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, []>, + "mtfsb0 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, []>, + "mtfsb1 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), - "mtfsf $FM, $rT", IntMTFSB0, []>, + "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), - "mffs $rT", IntMFFS, + "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1981,59 +2106,68 @@ let Uses = [RM] in { let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit -// +let isCommutable = 1 in defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +let isCodeGenOnly = 1 in +def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>; +let isCommutable = 1 in defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; + defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IntDivW, + "divw", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IntDivW, + "divwu", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; +let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhw", "$rT, $rA, $rB", IntMulHW, + "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhwu", "$rT, $rA, $rB", IntMulHWU, + "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mullw", "$rT, $rA, $rB", IntMulHW, + "mullw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; +} // isCommutable defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY] in { +let isCommutable = 1 in defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, -1))]>; defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, 0))]>; defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube -1, i32:$rA))]>; defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -2043,90 +2177,96 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), // let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. let Uses = [RM] in { +let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; +} // isCommutable } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) // and 4/8 byte forms for the result and operand type.. -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FSELD : AForm_1r<63, 23, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FSELS : AForm_1r<63, 23, (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { + let isCommutable = 1 in { defm FADD : AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fadd", "$FRT, $FRA, $FRB", FPAddSub, + "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; defm FADDS : AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fadds", "$FRT, $FRA, $FRB", FPGeneral, + "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + } // isCommutable defm FDIV : AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fdiv", "$FRT, $FRA, $FRB", FPDivD, + "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; defm FDIVS : AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fdivs", "$FRT, $FRA, $FRB", FPDivS, + "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + let isCommutable = 1 in { defm FMUL : AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), - "fmul", "$FRT, $FRA, $FRC", FPFused, + "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; defm FMULS : AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), - "fmuls", "$FRT, $FRA, $FRC", FPGeneral, + "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + } // isCommutable defm FSUB : AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fsub", "$FRT, $FRA, $FRB", FPAddSub, + "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; defm FSUBS : AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fsubs", "$FRT, $FRA, $FRB", FPGeneral, + "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } @@ -2136,7 +2276,7 @@ let PPC970_Unit = 1 in { // FXU Operations. let isSelect = 1 in def ISEL : AForm_4<31, 15, (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } @@ -2147,24 +2287,24 @@ let isCommutable = 1 in { // RLWIMI can be commuted if the rotate amount is zero. defm RLWIMI : MForm_2r<20, (outs gprc:$rA), (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, - u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate, - []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, - NoEncode<"$rSi">; + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, RecFormRel; let Defs = [CR0] in def RLWINMo : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; } defm RLWNM : MForm_2r<23, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), - "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral, + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, []>; } } // neverHasSideEffects = 1 @@ -2178,8 +2318,10 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not i32:$in), - (NOR $in, $in)>; +def i32not : OutPatFrag<(ops node:$in), + (NOR $in, $in)>; +def : Pat<(not i32:$in), + (i32not $in)>; // ADD an arbitrary immediate. def : Pat<(add i32:$in, imm:$imm), @@ -2250,6 +2392,17 @@ def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), (ADDIS $in, tblockaddress:$g)>; +// Support for thread-local storage. +def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", + [(set i32:$rD, (PPCppc32GOT))]>; + +def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; +def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), + (ADD4TLS $in, tglobaltlsaddr:$g)>; + // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. @@ -2284,7 +2437,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)), def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; -def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>; +def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>; // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), @@ -2304,52 +2458,561 @@ def : Pat<(fcopysign f32:$frB, f64:$frA), include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" +include "PPCInstrVSX.td" + +def crnot : OutPatFrag<(ops node:$in), + (CRNOR $in, $in)>; +def : Pat<(not i1:$in), + (crnot $in)>; + +// Patterns for arithmetic i1 operations. +def : Pat<(add i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(sub i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(mul i1:$a, i1:$b), + (CRAND $a, $b)>; + +// We're sometimes asked to materialize i1 -1, which is just 1 in this case +// (-1 is used to mean all bits set). +def : Pat<(i1 -1), (CRSET)>; + +// i1 extensions, implemented in terms of isel. +def : Pat<(i32 (zext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i32 (sext i1:$in)), + (SELECT_I4 $in, (LI -1), (LI 0))>; + +def : Pat<(i64 (zext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; +def : Pat<(i64 (sext i1:$in)), + (SELECT_I8 $in, (LI8 -1), (LI8 0))>; + +// FIXME: We should choose either a zext or a sext based on other constants +// already around. +def : Pat<(i32 (anyext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i64 (anyext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; + +// match setcc on i1 variables. +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), + (CREQV $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), + (CRXOR $s1, $s2)>; + +// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE, +// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for +// floating-point types. + +multiclass CRNotPat { + def : Pat; + def : Pat<(not pattern), result>; + + // We can also fold the crnot into an extension: + def : Pat<(i32 (zext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + def : Pat<(i32 (sext pattern)), + (SELECT_I4 result, (LI 0), (LI -1))>; + + // We can also fold the crnot into an extension: + def : Pat<(i64 (zext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; + def : Pat<(i64 (sext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 -1))>; + + // FIXME: We should choose either a zext or a sext based on other constants + // already around. + def : Pat<(i32 (anyext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + + def : Pat<(i64 (anyext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; +} + +// FIXME: Because of what seems like a bug in TableGen's type-inference code, +// we need to write imm:$imm in the output patterns below, not just $imm, or +// else the resulting matcher will not correctly add the immediate operand +// (making it a register operand instead). + +// extended SETCC. +multiclass ExtSetCCPat { + def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; + + def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; +} + +// Note that we do all inversions below with i(32|64)not, instead of using +// (xori x, 1) because on the A2 nor has single-cycle latency while xori +// has 2-cycle latency. + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (CNTLZW $in), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (CNTLZD $in), 58, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not (CNTLZD $in)), 58, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +// SETCC for i32. +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpw cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmplwi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// SETCC for i64. +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpd cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmpldi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// SETCC for f32. +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// match select on i1 variables: +def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), + (CROR (CRAND $cond , $tval), + (CRAND (crnot $cond), $fval))>; + +// match selectcc on i1 variables: +// select (lhs == rhs), tval, fval is: +// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), + (CROR (CRAND (CREQV $lhs, $rhs), $tval), + (CRAND (CRXOR $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), + (CROR (CRAND (CREQV $lhs, $rhs), $fval), + (CRAND (CRXOR $lhs, $rhs), $tval))>; + +// match selectcc on i1 variables with non-i1 output. +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), + (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), + (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), + (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), + (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), + (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), + (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; +let usesCustomInserter = 1 in { +def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_EQ_BIT", + [(set i1:$dst, (trunc (not i32:$in)))]>; +def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_GT_BIT", + [(set i1:$dst, (trunc i32:$in))]>; + +def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_EQ_BIT8", + [(set i1:$dst, (trunc (not i64:$in)))]>; +def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_GT_BIT8", + [(set i1:$dst, (trunc i64:$in))]>; +} + +def : Pat<(i1 (not (trunc i32:$in))), + (ANDIo_1_EQ_BIT $in)>; +def : Pat<(i1 (not (trunc i64:$in))), + (ANDIo_1_EQ_BIT8 $in)>; //===----------------------------------------------------------------------===// // PowerPC Instructions used for assembler/disassembler only // def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), - "isync", SprISYNC, []>; + "isync", IIC_SprISYNC, []>; def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), - "icbi $src", LdStICBI, []>; + "icbi $src", IIC_LdStICBI, []>; def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), - "eieio", LdStLoad, []>; + "eieio", IIC_LdStLoad, []>; def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), - "wait $L", LdStLoad, []>; + "wait $L", IIC_LdStLoad, []>; def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsr $RS, $L", SprMTMSR>; + "mtmsr $RS, $L", IIC_SprMTMSR>; def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), - "mfmsr $RT", SprMFMSR, []>; + "mfmsr $RT", IIC_SprMFMSR, []>; def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsrd $RS, $L", SprMTMSRD>; + "mtmsrd $RS, $L", IIC_SprMTMSRD>; def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), - "slbie $RB", SprSLBIE, []>; + "slbie $RB", IIC_SprSLBIE, []>; def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), - "slbmte $RS, $RB", SprSLBMTE, []>; + "slbmte $RS, $RB", IIC_SprSLBMTE, []>; def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), - "slbmfee $RT, $RB", SprSLBMFEE, []>; + "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>; -def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>; +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; def TLBSYNC : XForm_0<31, 566, (outs), (ins), - "tlbsync", SprTLBSYNC, []>; + "tlbsync", IIC_SprTLBSYNC, []>; def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), - "tlbiel $RB", SprTLBIEL, []>; + "tlbiel $RB", IIC_SprTLBIEL, []>; def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), - "tlbie $RB,$RS", SprTLBIE, []>; + "tlbie $RB,$RS", IIC_SprTLBIE, []>; //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases @@ -2373,10 +3036,10 @@ class PPCAsmPseudo def : InstAlias<"sc", (SC 0)>; -def : InstAlias<"sync", (SYNC 0)>; -def : InstAlias<"msync", (SYNC 0)>; -def : InstAlias<"lwsync", (SYNC 1)>; -def : InstAlias<"ptesync", (SYNC 2)>; +def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>; +def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>; +def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>; def : InstAlias<"wait", (WAIT 0)>; def : InstAlias<"waitrsv", (WAIT 1)>; @@ -2565,19 +3228,19 @@ let PPC970_Unit = 7 in { let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCLR : XLForm_2<19, 16, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclr $bo, $bi, $bh", BrB, []>; + "bclr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCLRL : XLForm_2<19, 16, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclrl $bo, $bi, $bh", BrB, []>; + "bclrl $bo, $bi, $bh", IIC_BrB, []>; let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCCTR : XLForm_2<19, 528, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctr $bo, $bi, $bh", BrB, []>; + "bcctr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCCTRL : XLForm_2<19, 528, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctrl $bo, $bi, $bh", BrB, []>; + "bcctrl $bo, $bi, $bh", IIC_BrB, []>; } def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; @@ -2620,14 +3283,14 @@ multiclass BranchExtendedMnemonicPM { (BCCA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $cc", - (BCLR bibo, crrc:$cc)>; + (BCCLR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lr"#pm, - (BCLR bibo, CR0)>; + (BCCLR bibo, CR0)>; def : InstAlias<"b"#name#"ctr"#pm#" $cc", - (BCCTR bibo, crrc:$cc)>; + (BCCCTR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctr"#pm, - (BCCTR bibo, CR0)>; + (BCCCTR bibo, CR0)>; def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; @@ -2640,14 +3303,14 @@ multiclass BranchExtendedMnemonicPM { (BCCLA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $cc", - (BCLRL bibo, crrc:$cc)>; + (BCCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl"#pm, - (BCLRL bibo, CR0)>; + (BCCLRL bibo, CR0)>; def : InstAlias<"b"#name#"ctrl"#pm#" $cc", - (BCCTRL bibo, crrc:$cc)>; + (BCCCTRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctrl"#pm, - (BCCTRL bibo, CR0)>; + (BCCCTRL bibo, CR0)>; } multiclass BranchExtendedMnemonic { defm : BranchExtendedMnemonicPM; @@ -2671,18 +3334,18 @@ def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; multiclass TrapExtendedMnemonic { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td new file mode 100644 index 0000000..9cc919e --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -0,0 +1,816 @@ +//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the VSX extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +def PPCRegVSRCAsmOperand : AsmOperandClass { + let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsrc : RegisterOperand { + let ParserMatchClass = PPCRegVSRCAsmOperand; +} + +def PPCRegVSFRCAsmOperand : AsmOperandClass { + let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsfrc : RegisterOperand { + let ParserMatchClass = PPCRegVSFRCAsmOperand; +} + +multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list pattern> { + let BaseName = asmbase in { + def NAME : XX3Form_Rc; + let Defs = [CR6] in + def o : XX3Form_Rc, isDOT; + } +} + +def HasVSX : Predicate<"PPCSubTarget.hasVSX()">; +let Predicates = [HasVSX] in { +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +let neverHasSideEffects = 1 in { // VSX instructions don't have side effects. +let Uses = [RM] in { + + // Load indexed instructions + let mayLoad = 1, canFoldAsLoad = 1 in { + def LXSDX : XForm_1<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (load xoaddr:$src))]>; + + def LXVD2X : XForm_1<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (load xoaddr:$src))]>; + + def LXVDSX : XForm_1<31, 332, + (outs vsrc:$XT), (ins memrr:$src), + "lxvdsx $XT, $src", IIC_LdStLFD, []>; + + def LXVW4X : XForm_1<31, 780, + (outs vsrc:$XT), (ins memrr:$src), + "lxvw4x $XT, $src", IIC_LdStLFD, []>; + } + + // Store indexed instructions + let mayStore = 1 in { + def STXSDX : XX1Form<31, 716, + (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsdx $XT, $dst", IIC_LdStSTFD, + [(store f64:$XT, xoaddr:$dst)]>; + + def STXVD2X : XX1Form<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + [(store v2f64:$XT, xoaddr:$dst)]>; + + def STXVW4X : XX1Form<31, 908, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvw4x $XT, $dst", IIC_LdStSTFD, []>; + } + + // Add/Mul Instructions + let isCommutable = 1 in { + def XSADDDP : XX3Form<60, 32, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsadddp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; + def XSMULDP : XX3Form<60, 48, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmuldp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; + + def XVADDDP : XX3Form<60, 96, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvadddp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; + + def XVADDSP : XX3Form<60, 64, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvaddsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; + + def XVMULDP : XX3Form<60, 112, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmuldp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; + + def XVMULSP : XX3Form<60, 80, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmulsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; + } + + // Subtract Instructions + def XSSUBDP : XX3Form<60, 40, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xssubdp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; + + def XVSUBDP : XX3Form<60, 104, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubdp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; + def XVSUBSP : XX3Form<60, 72, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDADP" in { + let isCommutable = 1 in + def XSMADDADP : XX3Form<60, 33, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMDP : XX3Form<60, 41, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBADP" in { + let isCommutable = 1 in + def XSMSUBADP : XX3Form<60, 49, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMDP : XX3Form<60, 57, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDADP" in { + let isCommutable = 1 in + def XSNMADDADP : XX3Form<60, 161, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMDP : XX3Form<60, 169, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBADP" in { + let isCommutable = 1 in + def XSNMSUBADP : XX3Form<60, 177, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMDP : XX3Form<60, 185, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDADP" in { + let isCommutable = 1 in + def XVMADDADP : XX3Form<60, 97, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMDP : XX3Form<60, 105, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDASP" in { + let isCommutable = 1 in + def XVMADDASP : XX3Form<60, 65, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMSP : XX3Form<60, 73, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBADP" in { + let isCommutable = 1 in + def XVMSUBADP : XX3Form<60, 113, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMDP : XX3Form<60, 121, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBASP" in { + let isCommutable = 1 in + def XVMSUBASP : XX3Form<60, 81, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMSP : XX3Form<60, 89, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDADP" in { + let isCommutable = 1 in + def XVNMADDADP : XX3Form<60, 225, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMDP : XX3Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDASP" in { + let isCommutable = 1 in + def XVNMADDASP : XX3Form<60, 193, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMSP : XX3Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBADP" in { + let isCommutable = 1 in + def XVNMSUBADP : XX3Form<60, 241, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMDP : XX3Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBASP" in { + let isCommutable = 1 in + def XVNMSUBASP : XX3Form<60, 209, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMSP : XX3Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + // Division Instructions + def XSDIVDP : XX3Form<60, 56, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; + def XSSQRTDP : XX2Form<60, 75, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xssqrtdp $XT, $XB", IIC_FPSqrtD, + [(set f64:$XT, (fsqrt f64:$XB))]>; + + def XSREDP : XX2Form<60, 90, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsredp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfre f64:$XB))]>; + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + def XSTDIVDP : XX3Form_1<60, 61, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSTSQRTDP : XX2Form_1<60, 106, + (outs crrc:$crD), (ins vsfrc:$XB), + "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; + + def XVDIVDP : XX3Form<60, 120, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; + def XVDIVSP : XX3Form<60, 88, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; + + def XVSQRTDP : XX2Form<60, 203, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtdp $XT, $XB", IIC_FPSqrtD, + [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; + def XVSQRTSP : XX2Form<60, 139, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtsp $XT, $XB", IIC_FPSqrtS, + [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; + + def XVTDIVDP : XX3Form_1<60, 125, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XVTDIVSP : XX3Form_1<60, 93, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; + + def XVTSQRTDP : XX2Form_1<60, 234, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; + def XVTSQRTSP : XX2Form_1<60, 170, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; + + def XVREDP : XX2Form<60, 218, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvredp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfre v2f64:$XB))]>; + def XVRESP : XX2Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvresp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfre v4f32:$XB))]>; + + def XVRSQRTEDP : XX2Form<60, 202, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtedp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>; + def XVRSQRTESP : XX2Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtesp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>; + + // Compare Instructions + def XSCMPODP : XX3Form_1<60, 43, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPEQSP : XX3Form_Rcr<60, 67, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGEDP : XX3Form_Rcr<60, 115, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGESP : XX3Form_Rcr<60, 83, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGTDP : XX3Form_Rcr<60, 107, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGTSP : XX3Form_Rcr<60, 75, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + + // Move Instructions + def XSABSDP : XX2Form<60, 345, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fabs f64:$XB))]>; + def XSNABSDP : XX2Form<60, 361, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fabs f64:$XB)))]>; + def XSNEGDP : XX2Form<60, 377, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnegdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg f64:$XB))]>; + def XSCPSGNDP : XX3Form<60, 176, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xscpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; + + def XVABSDP : XX2Form<60, 473, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fabs v2f64:$XB))]>; + + def XVABSSP : XX2Form<60, 409, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fabs v4f32:$XB))]>; + + def XVCPSGNDP : XX3Form<60, 240, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>; + def XVCPSGNSP : XX3Form<60, 208, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>; + + def XVNABSDP : XX2Form<60, 489, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>; + def XVNABSSP : XX2Form<60, 425, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>; + + def XVNEGDP : XX2Form<60, 505, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg v2f64:$XB))]>; + def XVNEGSP : XX2Form<60, 441, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg v4f32:$XB))]>; + + // Conversion Instructions + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + def XSCVDPSXDS : XX2Form<60, 344, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctidz f64:$XB))]>; + def XSCVDPSXWS : XX2Form<60, 88, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwz f64:$XB))]>; + def XSCVDPUXDS : XX2Form<60, 328, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiduz f64:$XB))]>; + def XSCVDPUXWS : XX2Form<60, 72, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; + def XSCVSPDP : XX2Form<60, 329, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvspdp $XT, $XB", IIC_VecFP, []>; + def XSCVSXDDP : XX2Form<60, 376, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvsxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfid f64:$XB))]>; + def XSCVUXDDP : XX2Form<60, 360, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvuxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfidu f64:$XB))]>; + + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + def XVCVDPSXDS : XX2Form<60, 472, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>; + def XVCVDPSXWS : XX2Form<60, 216, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxws $XT, $XB", IIC_VecFP, []>; + def XVCVDPUXDS : XX2Form<60, 456, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>; + def XVCVDPUXWS : XX2Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxws $XT, $XB", IIC_VecFP, []>; + + def XVCVSPDP : XX2Form<60, 457, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspdp $XT, $XB", IIC_VecFP, []>; + def XVCVSPSXDS : XX2Form<60, 408, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPSXWS : XX2Form<60, 152, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxws $XT, $XB", IIC_VecFP, []>; + def XVCVSPUXDS : XX2Form<60, 392, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPUXWS : XX2Form<60, 136, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxws $XT, $XB", IIC_VecFP, []>; + def XVCVSXDDP : XX2Form<60, 504, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>; + def XVCVSXDSP : XX2Form<60, 440, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxdsp $XT, $XB", IIC_VecFP, []>; + def XVCVSXWDP : XX2Form<60, 248, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwdp $XT, $XB", IIC_VecFP, []>; + def XVCVSXWSP : XX2Form<60, 184, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwsp $XT, $XB", IIC_VecFP, []>; + def XVCVUXDDP : XX2Form<60, 488, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>; + def XVCVUXDSP : XX2Form<60, 424, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxdsp $XT, $XB", IIC_VecFP, []>; + def XVCVUXWDP : XX2Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwdp $XT, $XB", IIC_VecFP, []>; + def XVCVUXWSP : XX2Form<60, 168, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwsp $XT, $XB", IIC_VecFP, []>; + + // Rounding Instructions + def XSRDPI : XX2Form<60, 73, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpi $XT, $XB", IIC_VecFP, + [(set f64:$XT, (frnd f64:$XB))]>; + def XSRDPIC : XX2Form<60, 107, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpic $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fnearbyint f64:$XB))]>; + def XSRDPIM : XX2Form<60, 121, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpim $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ffloor f64:$XB))]>; + def XSRDPIP : XX2Form<60, 105, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpip $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fceil f64:$XB))]>; + def XSRDPIZ : XX2Form<60, 89, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpiz $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ftrunc f64:$XB))]>; + + def XVRDPI : XX2Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpi $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (frnd v2f64:$XB))]>; + def XVRDPIC : XX2Form<60, 235, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpic $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + def XVRDPIM : XX2Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpim $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ffloor v2f64:$XB))]>; + def XVRDPIP : XX2Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpip $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fceil v2f64:$XB))]>; + def XVRDPIZ : XX2Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpiz $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; + + def XVRSPI : XX2Form<60, 137, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspi $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (frnd v4f32:$XB))]>; + def XVRSPIC : XX2Form<60, 171, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspic $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + def XVRSPIM : XX2Form<60, 185, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspim $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ffloor v4f32:$XB))]>; + def XVRSPIP : XX2Form<60, 169, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspip $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fceil v4f32:$XB))]>; + def XVRSPIZ : XX2Form<60, 153, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspiz $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; + + // Max/Min Instructions + let isCommutable = 1 in { + def XSMAXDP : XX3Form<60, 160, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + def XSMINDP : XX3Form<60, 168, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmindp $XT, $XA, $XB", IIC_VecFP, []>; + + def XVMAXDP : XX3Form<60, 224, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + def XVMINDP : XX3Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmindp $XT, $XA, $XB", IIC_VecFP, []>; + + def XVMAXSP : XX3Form<60, 192, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>; + def XVMINSP : XX3Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvminsp $XT, $XA, $XB", IIC_VecFP, []>; + } // isCommutable +} // Uses = [RM] + + // Logical Instructions + let isCommutable = 1 in + def XXLAND : XX3Form<60, 130, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxland $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>; + def XXLANDC : XX3Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlandc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, + (vnot_ppc v4i32:$XB)))]>; + let isCommutable = 1 in { + def XXLNOR : XX3Form<60, 162, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA, + v4i32:$XB)))]>; + def XXLOR : XX3Form<60, 146, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; + let isCodeGenOnly = 1 in + def XXLORf: XX3Form<60, 146, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; + def XXLXOR : XX3Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlxor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; + } // isCommutable + + // Permutation Instructions + def XXMRGHW : XX3Form<60, 18, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>; + def XXMRGLW : XX3Form<60, 50, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>; + + def XXPERMDI : XX3Form_2<60, 10, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + def XXSEL : XX4Form<60, 3, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; + + def XXSLDWI : XX3Form_2<60, 2, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), + "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; +} // neverHasSideEffects +} // AddedComplexity + +def : InstAlias<"xvmovdp $XT, $XB", + (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; +def : InstAlias<"xvmovsp $XT, $XB", + (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; + +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; +def : InstAlias<"xxmrghd $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; +def : InstAlias<"xxmrgld $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; + +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; + +def : Pat<(f64 (vector_extract v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; +def : Pat<(f64 (vector_extract v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; + +// Additional fnmsub patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), + (XSNMSUBADP $B, $C, $A)>; +def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), + (XSNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; + +def : Pat<(v2f64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2i64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +// sign extension patterns +// To extend "in place" from v2i32 to v2i64, we have input data like: +// | undef | i32 | undef | i32 | +// but xvcvsxwdp expects the input in big-Endian format: +// | i32 | undef | i32 | undef | +// so we need to shift everything to the left by one i32 (word) before +// the conversion. +def : Pat<(sext_inreg v2i64:$C, v2i32), + (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>; +def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), + (XVCVSXWDP (XXSLDWI $C, $C, 1))>; + +} // AddedComplexity +} // HasVSX + diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 5e3a48d..227919c 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -214,6 +214,10 @@ asm( ".text\n" ".align 2\n" ".globl PPC64CompilationCallback\n" +#if _CALL_ELF == 2 + ".type PPC64CompilationCallback,@function\n" +"PPC64CompilationCallback:\n" +#else ".section \".opd\",\"aw\",@progbits\n" ".align 3\n" "PPC64CompilationCallback:\n" @@ -223,6 +227,7 @@ asm( ".align 4\n" ".type PPC64CompilationCallback,@function\n" ".L.PPC64CompilationCallback:\n" +#endif # else asm( ".text\n" diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f61c8bf..029bb8a 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -19,11 +19,14 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { @@ -32,35 +35,40 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ + const TargetMachine &TM = AP.TM; + Mangler *Mang = AP.Mang; + const DataLayout *DL = TM.getDataLayout(); MCContext &Ctx = AP.OutContext; SmallString<128> Name; + StringRef Suffix; + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) + Suffix = "$stub"; + else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + Suffix = "$non_lazy_ptr"; + + if (!Suffix.empty()) + Name += DL->getPrivateGlobalPrefix(); + + unsigned PrefixLen = Name.size(); + if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); - Name += AP.MAI->getGlobalPrefix(); - Name += MO.getSymbolName(); - } else { + Mang->getNameWithPrefix(Name, MO.getSymbolName()); + } else { const GlobalValue *GV = MO.getGlobal(); - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB || - (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)) - isImplicitlyPrivate = true; - - AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + TM.getNameWithPrefix(Name, GV, *Mang); } - + + unsigned OrigLen = Name.size() - PrefixLen; + + Name += Suffix; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); + // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { - Name += "$stub"; - const char *PGP = AP.MAI->getPrivateGlobalPrefix(); - const char *Prefix = ""; - if (!Name.startswith(PGP)) { - // http://llvm.org/bugs/show_bug.cgi?id=15763 - // all stubs and lazy_ptrs should be local symbols, which need leading 'L' - Prefix = PGP; - } - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name)); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI(AP).getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -72,10 +80,9 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ StubValueTy(AP.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { - Name.erase(Name.end()-5, Name.end()); StubSym = MachineModuleInfoImpl:: - StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false); } return Sym; } @@ -83,9 +90,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, // then add the suffix. if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { - Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - MachineModuleInfoMachO &MachO = getMachOMMI(AP); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -101,7 +105,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ return Sym; } - return Ctx.GetOrCreateSymbol(Name.str()); + return Sym; } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 19ccbfc..4ff282e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -230,12 +229,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::F8RCRegClassID: case PPC::F4RCRegClassID: case PPC::VRRCRegClassID: + case PPC::VFRCRegClassID: + case PPC::VSLRCRegClassID: + case PPC::VSHRCRegClassID: return 32 - DefaultSafety; + case PPC::VSRCRegClassID: + case PPC::VSFRCRegClassID: + return 64 - DefaultSafety; case PPC::CRRCRegClassID: return 8 - DefaultSafety; } } +const TargetRegisterClass* +PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const { + if (Subtarget.hasVSX()) { + // With VSX, we can inflate various sub-register classes to the full VSX + // register set. + + if (RC == &PPC::F8RCRegClass) + return &PPC::VSFRCRegClass; + else if (RC == &PPC::VRRCRegClass) + return &PPC::VSRCRegClass; + } + + return TargetRegisterInfo::getLargestLegalSuperClass(RC); +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -452,6 +472,127 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + +void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_CRBIT , + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL), + getCRFromCRBit(SrcReg)) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) + .addReg(getCRFromCRBit(SrcReg)); + + // If the saved register wasn't CR0LT, shift the bits left so that the bit to + // store is the first one. Mask all but that bit. + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + // rlwinm rA, rA, ShiftBits, 0, 0. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg)) + .addImm(0).addImm(0); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; = RESTORE_CRBIT + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CRBIT does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); + + unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) + .addReg(getCRFromCRBit(DestReg)); + + unsigned ShiftBits = getEncodingValue(DestReg); + // rlwimi r11, r10, 32-ShiftBits, ..., ... + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO) + .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill) + .addImm(ShiftBits ? 32-ShiftBits : 0) + .addImm(ShiftBits).addImm(ShiftBits); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), + getCRFromCRBit(DestReg)) + .addReg(RegO, RegState::Kill) + // Make sure we have a use dependency all the way through this + // sequence of instructions. We can't have the other bits in the CR + // modified in between the mfocrf and the mtocrf. + .addReg(getCRFromCRBit(DestReg), RegState::Implicit); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. @@ -595,6 +736,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_CR) { lowerCRRestore(II, FrameIndex); return; + } else if (OpC == PPC::SPILL_CRBIT) { + lowerCRBitSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CRBIT) { + lowerCRBitRestore(II, FrameIndex); + return; } else if (OpC == PPC::SPILL_VRSAVE) { lowerVRSAVESpilling(II, FrameIndex); return; @@ -812,11 +959,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, .addFrameIndex(FrameIdx).addImm(Offset); } -void -PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; - +void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { unsigned FIOperandNum = 0; while (!MI.getOperand(FIOperandNum).isFI()) { ++FIOperandNum; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index dd3bb40..c3e54b4 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -15,8 +15,8 @@ #ifndef POWERPC32_REGISTERINFO_H #define POWERPC32_REGISTERINFO_H -#include "llvm/ADT/DenseMap.h" #include "PPC.h" +#include "llvm/ADT/DenseMap.h" #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" @@ -40,6 +40,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; @@ -69,6 +72,10 @@ public: unsigned FrameIndex) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; + void lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; void lowerVRSAVERestore(MachineBasicBlock::iterator II, @@ -85,8 +92,8 @@ public: void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const; - void resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const; + void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const; bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; // Debug information queries. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index d566e2c..e11f7d4 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>; def sub_eq : SubRegIndex<1, 2>; def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; +def sub_128 : SubRegIndex<128>; } @@ -47,9 +49,36 @@ class FPR num, string n> : PPCReg { let HWEncoding{4-0} = num; } -// VR - One of the 32 128-bit vector registers -class VR num, string n> : PPCReg { +// VF - One of the 32 64-bit floating-point subregisters of the vector +// registers (used by VSX). +class VF num, string n> : PPCReg { let HWEncoding{4-0} = num; + let HWEncoding{5} = 1; +} + +// VR - One of the 32 128-bit vector registers +class VR : PPCReg { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 0; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar +// floating-point registers. +class VSRL : PPCReg { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VSRH - One of the 32 128-bit VSX registers that overlap with the vector +// registers. +class VSRH : PPCReg { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 1; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_128]; } // CR - One of the 8 4-bit condition registers @@ -80,12 +109,27 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// Floating-point vector subregisters (for VSX) +foreach Index = 0-31 in { + def VF#Index : VF; +} + // Vector registers foreach Index = 0-31 in { - def V#Index : VR, + def V#Index : VR("VF"#Index), "v"#Index>, DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// VSX registers +foreach Index = 0-31 in { + def VSL#Index : VSRL("F"#Index), "vs"#Index>, + DwarfRegAlias("F"#Index)>; +} +foreach Index = 0-31 in { + def VSH#Index : VSRH("V"#Index), "vs" # !add(Index, 32)>, + DwarfRegAlias("V"#Index)>; +} + // The reprsentation of r0 when treated as the constant 0. def ZERO : GPR<0, "0">; def ZERO8 : GP8; @@ -204,17 +248,39 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; -def CRBITRC : RegisterClass<"PPC", [i32], 32, - (add CR0LT, CR0GT, CR0EQ, CR0UN, - CR1LT, CR1GT, CR1EQ, CR1UN, - CR2LT, CR2GT, CR2EQ, CR2UN, +// VSX register classes (the allocation order mirrors that of the corresponding +// subregister classes). +def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add (sequence "VSL%u", 0, 13), + (sequence "VSL%u", 31, 14))>; +def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7, + VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14, + VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30, + VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23, + VSH22, VSH21, VSH20)>; +def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add VSLRC, VSHRC)>; + +// Register classes for the 64-bit "scalar" VSX subregisters. +def VFRC : RegisterClass<"PPC", [f64], 64, + (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, + VF8, VF9, VF10, VF11, VF12, VF13, VF14, + VF15, VF16, VF17, VF18, VF19, VF31, VF30, + VF29, VF28, VF27, VF26, VF25, VF24, VF23, + VF22, VF21, VF20)>; +def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; + +def CRBITRC : RegisterClass<"PPC", [i1], 32, + (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, CR4LT, CR4GT, CR4EQ, CR4UN, CR5LT, CR5GT, CR5EQ, CR5UN, CR6LT, CR6GT, CR6EQ, CR6UN, - CR7LT, CR7GT, CR7EQ, CR7UN)> -{ - let CopyCost = -1; + CR7LT, CR7GT, CR7EQ, CR7UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR0LT, CR0GT, CR0EQ, CR0UN)> { + let Size = 32; } def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 92ba69c..1221d41 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -8,114 +8,106 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across PowerPC chips sets -// -def BPU : FuncUnit; // Branch unit -def SLU : FuncUnit; // Store/load unit -def SRU : FuncUnit; // special register unit -def IU1 : FuncUnit; // integer unit 1 (simple) -def IU2 : FuncUnit; // integer unit 2 (complex) -def FPU1 : FuncUnit; // floating point unit 1 -def FPU2 : FuncUnit; // floating point unit 2 -def VPU : FuncUnit; // vector permutation unit -def VIU1 : FuncUnit; // vector integer unit 1 (simple) -def VIU2 : FuncUnit; // vector integer unit 2 (complex) -def VFPU : FuncUnit; // vector floating point unit - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // -def IntSimple : InstrItinClass; -def IntGeneral : InstrItinClass; -def IntCompare : InstrItinClass; -def IntDivD : InstrItinClass; -def IntDivW : InstrItinClass; -def IntMFFS : InstrItinClass; -def IntMFVSCR : InstrItinClass; -def IntMTFSB0 : InstrItinClass; -def IntMTSRD : InstrItinClass; -def IntMulHD : InstrItinClass; -def IntMulHW : InstrItinClass; -def IntMulHWU : InstrItinClass; -def IntMulLI : InstrItinClass; -def IntRFID : InstrItinClass; -def IntRotateD : InstrItinClass; -def IntRotateDI : InstrItinClass; -def IntRotate : InstrItinClass; -def IntShift : InstrItinClass; -def IntTrapD : InstrItinClass; -def IntTrapW : InstrItinClass; -def BrB : InstrItinClass; -def BrCR : InstrItinClass; -def BrMCR : InstrItinClass; -def BrMCRX : InstrItinClass; -def LdStDCBA : InstrItinClass; -def LdStDCBF : InstrItinClass; -def LdStDCBI : InstrItinClass; -def LdStLoad : InstrItinClass; -def LdStLoadUpd : InstrItinClass; -def LdStStore : InstrItinClass; -def LdStStoreUpd : InstrItinClass; -def LdStDSS : InstrItinClass; -def LdStICBI : InstrItinClass; -def LdStLD : InstrItinClass; -def LdStLDU : InstrItinClass; -def LdStLDARX : InstrItinClass; -def LdStLFD : InstrItinClass; -def LdStLFDU : InstrItinClass; -def LdStLHA : InstrItinClass; -def LdStLHAU : InstrItinClass; -def LdStLMW : InstrItinClass; -def LdStLVecX : InstrItinClass; -def LdStLWA : InstrItinClass; -def LdStLWARX : InstrItinClass; -def LdStSLBIA : InstrItinClass; -def LdStSLBIE : InstrItinClass; -def LdStSTD : InstrItinClass; -def LdStSTDCX : InstrItinClass; -def LdStSTDU : InstrItinClass; -def LdStSTFD : InstrItinClass; -def LdStSTFDU : InstrItinClass; -def LdStSTVEBX : InstrItinClass; -def LdStSTWCX : InstrItinClass; -def LdStSync : InstrItinClass; -def SprISYNC : InstrItinClass; -def SprMFSR : InstrItinClass; -def SprMTMSR : InstrItinClass; -def SprMTSR : InstrItinClass; -def SprTLBSYNC : InstrItinClass; -def SprMFCR : InstrItinClass; -def SprMFMSR : InstrItinClass; -def SprMFSPR : InstrItinClass; -def SprMFTB : InstrItinClass; -def SprMTSPR : InstrItinClass; -def SprMTSRIN : InstrItinClass; -def SprRFI : InstrItinClass; -def SprSC : InstrItinClass; -def FPGeneral : InstrItinClass; -def FPAddSub : InstrItinClass; -def FPCompare : InstrItinClass; -def FPDivD : InstrItinClass; -def FPDivS : InstrItinClass; -def FPFused : InstrItinClass; -def FPRes : InstrItinClass; -def FPSqrt : InstrItinClass; -def VecGeneral : InstrItinClass; -def VecFP : InstrItinClass; -def VecFPCompare : InstrItinClass; -def VecComplex : InstrItinClass; -def VecPerm : InstrItinClass; -def VecFPRound : InstrItinClass; -def VecVSL : InstrItinClass; -def VecVSR : InstrItinClass; -def SprMTMSRD : InstrItinClass; -def SprSLIE : InstrItinClass; -def SprSLBIE : InstrItinClass; -def SprSLBMTE : InstrItinClass; -def SprSLBMFEE : InstrItinClass; -def SprSLBIA : InstrItinClass; -def SprTLBIEL : InstrItinClass; -def SprTLBIE : InstrItinClass; +def IIC_IntSimple : InstrItinClass; +def IIC_IntGeneral : InstrItinClass; +def IIC_IntCompare : InstrItinClass; +def IIC_IntDivD : InstrItinClass; +def IIC_IntDivW : InstrItinClass; +def IIC_IntMFFS : InstrItinClass; +def IIC_IntMFVSCR : InstrItinClass; +def IIC_IntMTFSB0 : InstrItinClass; +def IIC_IntMTSRD : InstrItinClass; +def IIC_IntMulHD : InstrItinClass; +def IIC_IntMulHW : InstrItinClass; +def IIC_IntMulHWU : InstrItinClass; +def IIC_IntMulLI : InstrItinClass; +def IIC_IntRFID : InstrItinClass; +def IIC_IntRotateD : InstrItinClass; +def IIC_IntRotateDI : InstrItinClass; +def IIC_IntRotate : InstrItinClass; +def IIC_IntShift : InstrItinClass; +def IIC_IntTrapD : InstrItinClass; +def IIC_IntTrapW : InstrItinClass; +def IIC_BrB : InstrItinClass; +def IIC_BrCR : InstrItinClass; +def IIC_BrMCR : InstrItinClass; +def IIC_BrMCRX : InstrItinClass; +def IIC_LdStDCBA : InstrItinClass; +def IIC_LdStDCBF : InstrItinClass; +def IIC_LdStDCBI : InstrItinClass; +def IIC_LdStLoad : InstrItinClass; +def IIC_LdStLoadUpd : InstrItinClass; +def IIC_LdStLoadUpdX : InstrItinClass; +def IIC_LdStStore : InstrItinClass; +def IIC_LdStStoreUpd : InstrItinClass; +def IIC_LdStDSS : InstrItinClass; +def IIC_LdStICBI : InstrItinClass; +def IIC_LdStLD : InstrItinClass; +def IIC_LdStLDU : InstrItinClass; +def IIC_LdStLDUX : InstrItinClass; +def IIC_LdStLDARX : InstrItinClass; +def IIC_LdStLFD : InstrItinClass; +def IIC_LdStLFDU : InstrItinClass; +def IIC_LdStLFDUX : InstrItinClass; +def IIC_LdStLHA : InstrItinClass; +def IIC_LdStLHAU : InstrItinClass; +def IIC_LdStLHAUX : InstrItinClass; +def IIC_LdStLMW : InstrItinClass; +def IIC_LdStLVecX : InstrItinClass; +def IIC_LdStLWA : InstrItinClass; +def IIC_LdStLWARX : InstrItinClass; +def IIC_LdStSLBIA : InstrItinClass; +def IIC_LdStSLBIE : InstrItinClass; +def IIC_LdStSTD : InstrItinClass; +def IIC_LdStSTDCX : InstrItinClass; +def IIC_LdStSTDU : InstrItinClass; +def IIC_LdStSTDUX : InstrItinClass; +def IIC_LdStSTFD : InstrItinClass; +def IIC_LdStSTFDU : InstrItinClass; +def IIC_LdStSTVEBX : InstrItinClass; +def IIC_LdStSTWCX : InstrItinClass; +def IIC_LdStSync : InstrItinClass; +def IIC_SprISYNC : InstrItinClass; +def IIC_SprMFSR : InstrItinClass; +def IIC_SprMTMSR : InstrItinClass; +def IIC_SprMTSR : InstrItinClass; +def IIC_SprTLBSYNC : InstrItinClass; +def IIC_SprMFCR : InstrItinClass; +def IIC_SprMFCRF : InstrItinClass; +def IIC_SprMFMSR : InstrItinClass; +def IIC_SprMFSPR : InstrItinClass; +def IIC_SprMFTB : InstrItinClass; +def IIC_SprMTSPR : InstrItinClass; +def IIC_SprMTSRIN : InstrItinClass; +def IIC_SprRFI : InstrItinClass; +def IIC_SprSC : InstrItinClass; +def IIC_FPGeneral : InstrItinClass; +def IIC_FPAddSub : InstrItinClass; +def IIC_FPCompare : InstrItinClass; +def IIC_FPDivD : InstrItinClass; +def IIC_FPDivS : InstrItinClass; +def IIC_FPFused : InstrItinClass; +def IIC_FPRes : InstrItinClass; +def IIC_FPSqrtD : InstrItinClass; +def IIC_FPSqrtS : InstrItinClass; +def IIC_VecGeneral : InstrItinClass; +def IIC_VecFP : InstrItinClass; +def IIC_VecFPCompare : InstrItinClass; +def IIC_VecComplex : InstrItinClass; +def IIC_VecPerm : InstrItinClass; +def IIC_VecFPRound : InstrItinClass; +def IIC_VecVSL : InstrItinClass; +def IIC_VecVSR : InstrItinClass; +def IIC_SprMTMSRD : InstrItinClass; +def IIC_SprSLIE : InstrItinClass; +def IIC_SprSLBIE : InstrItinClass; +def IIC_SprSLBMTE : InstrItinClass; +def IIC_SprSLBMFEE : InstrItinClass; +def IIC_SprSLBIA : InstrItinClass; +def IIC_SprTLBIEL : InstrItinClass; +def IIC_SprTLBIE : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. @@ -125,6 +117,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleP7.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" @@ -136,392 +129,392 @@ include "PPCScheduleE5500.td" // // opcode itinerary class // ====== =============== -// add IntSimple -// addc IntGeneral -// adde IntGeneral -// addi IntSimple -// addic IntGeneral -// addic. IntGeneral -// addis IntSimple -// addme IntGeneral -// addze IntGeneral -// and IntSimple -// andc IntSimple -// andi. IntGeneral -// andis. IntGeneral -// b BrB -// bc BrB -// bcctr BrB -// bclr BrB -// cmp IntCompare -// cmpi IntCompare -// cmpl IntCompare -// cmpli IntCompare -// cntlzd IntRotateD -// cntlzw IntGeneral -// crand BrCR -// crandc BrCR -// creqv BrCR -// crnand BrCR -// crnor BrCR -// cror BrCR -// crorc BrCR -// crxor BrCR -// dcba LdStDCBA -// dcbf LdStDCBF -// dcbi LdStDCBI -// dcbst LdStDCBF -// dcbt LdStLoad -// dcbtst LdStLoad -// dcbz LdStDCBF -// divd IntDivD -// divdu IntDivD -// divw IntDivW -// divwu IntDivW -// dss LdStDSS -// dst LdStDSS -// dstst LdStDSS -// eciwx LdStLoad -// ecowx LdStLoad -// eieio LdStLoad -// eqv IntSimple -// extsb IntSimple -// extsh IntSimple -// extsw IntSimple -// fabs FPGeneral -// fadd FPAddSub -// fadds FPGeneral -// fcfid FPGeneral -// fcmpo FPCompare -// fcmpu FPCompare -// fctid FPGeneral -// fctidz FPGeneral -// fctiw FPGeneral -// fctiwz FPGeneral -// fdiv FPDivD -// fdivs FPDivS -// fmadd FPFused -// fmadds FPGeneral -// fmr FPGeneral -// fmsub FPFused -// fmsubs FPGeneral -// fmul FPFused -// fmuls FPGeneral -// fnabs FPGeneral -// fneg FPGeneral -// fnmadd FPFused -// fnmadds FPGeneral -// fnmsub FPFused -// fnmsubs FPGeneral -// fres FPRes -// frsp FPGeneral -// frsqrte FPGeneral -// fsel FPGeneral -// fsqrt FPSqrt -// fsqrts FPSqrt -// fsub FPAddSub -// fsubs FPGeneral -// icbi LdStICBI -// isync SprISYNC -// lbz LdStLoad -// lbzu LdStLoadUpd -// lbzux LdStLoadUpd -// lbzx LdStLoad -// ld LdStLD -// ldarx LdStLDARX -// ldu LdStLDU -// ldux LdStLDU -// ldx LdStLD -// lfd LdStLFD -// lfdu LdStLFDU -// lfdux LdStLFDU -// lfdx LdStLFD -// lfs LdStLFD -// lfsu LdStLFDU -// lfsux LdStLFDU -// lfsx LdStLFD -// lha LdStLHA -// lhau LdStLHAU -// lhaux LdStLHAU -// lhax LdStLHA -// lhbrx LdStLoad -// lhz LdStLoad -// lhzu LdStLoadUpd -// lhzux LdStLoadUpd -// lhzx LdStLoad -// lmw LdStLMW -// lswi LdStLMW -// lswx LdStLMW -// lvebx LdStLVecX -// lvehx LdStLVecX -// lvewx LdStLVecX -// lvsl LdStLVecX -// lvsr LdStLVecX -// lvx LdStLVecX -// lvxl LdStLVecX -// lwa LdStLWA -// lwarx LdStLWARX -// lwaux LdStLHAU -// lwax LdStLHA -// lwbrx LdStLoad -// lwz LdStLoad -// lwzu LdStLoadUpd -// lwzux LdStLoadUpd -// lwzx LdStLoad -// mcrf BrMCR -// mcrfs FPGeneral -// mcrxr BrMCRX -// mfcr SprMFCR -// mffs IntMFFS -// mfmsr SprMFMSR -// mfspr SprMFSPR -// mfsr SprMFSR -// mfsrin SprMFSR -// mftb SprMFTB -// mfvscr IntMFVSCR -// mtcrf BrMCRX -// mtfsb0 IntMTFSB0 -// mtfsb1 IntMTFSB0 -// mtfsf IntMTFSB0 -// mtfsfi IntMTFSB0 -// mtmsr SprMTMSR -// mtmsrd LdStLD -// mtspr SprMTSPR -// mtsr SprMTSR -// mtsrd IntMTSRD -// mtsrdin IntMTSRD -// mtsrin SprMTSRIN -// mtvscr IntMFVSCR -// mulhd IntMulHD -// mulhdu IntMulHD -// mulhw IntMulHW -// mulhwu IntMulHWU -// mulld IntMulHD -// mulli IntMulLI -// mullw IntMulHW -// nand IntSimple -// neg IntSimple -// nor IntSimple -// or IntSimple -// orc IntSimple -// ori IntSimple -// oris IntSimple -// rfi SprRFI -// rfid IntRFID -// rldcl IntRotateD -// rldcr IntRotateD -// rldic IntRotateDI -// rldicl IntRotateDI -// rldicr IntRotateDI -// rldimi IntRotateDI -// rlwimi IntRotate -// rlwinm IntGeneral -// rlwnm IntGeneral -// sc SprSC -// slbia LdStSLBIA -// slbie LdStSLBIE -// sld IntRotateD -// slw IntGeneral -// srad IntRotateD -// sradi IntRotateDI -// sraw IntShift -// srawi IntShift -// srd IntRotateD -// srw IntGeneral -// stb LdStStore -// stbu LdStStoreUpd -// stbux LdStStoreUpd -// stbx LdStStore -// std LdStSTD -// stdcx. LdStSTDCX -// stdu LdStSTDU -// stdux LdStSTDU -// stdx LdStSTD -// stfd LdStSTFD -// stfdu LdStSTFDU -// stfdux LdStSTFDU -// stfdx LdStSTFD -// stfiwx LdStSTFD -// stfs LdStSTFD -// stfsu LdStSTFDU -// stfsux LdStSTFDU -// stfsx LdStSTFD -// sth LdStStore -// sthbrx LdStStore -// sthu LdStStoreUpd -// sthux LdStStoreUpd -// sthx LdStStore -// stmw LdStLMW -// stswi LdStLMW -// stswx LdStLMW -// stvebx LdStSTVEBX -// stvehx LdStSTVEBX -// stvewx LdStSTVEBX -// stvx LdStSTVEBX -// stvxl LdStSTVEBX -// stw LdStStore -// stwbrx LdStStore -// stwcx. LdStSTWCX -// stwu LdStStoreUpd -// stwux LdStStoreUpd -// stwx LdStStore -// subf IntGeneral -// subfc IntGeneral -// subfe IntGeneral -// subfic IntGeneral -// subfme IntGeneral -// subfze IntGeneral -// sync LdStSync -// td IntTrapD -// tdi IntTrapD -// tlbia LdStSLBIA -// tlbie LdStDCBF -// tlbsync SprTLBSYNC -// tw IntTrapW -// twi IntTrapW -// vaddcuw VecGeneral -// vaddfp VecFP -// vaddsbs VecGeneral -// vaddshs VecGeneral -// vaddsws VecGeneral -// vaddubm VecGeneral -// vaddubs VecGeneral -// vadduhm VecGeneral -// vadduhs VecGeneral -// vadduwm VecGeneral -// vadduws VecGeneral -// vand VecGeneral -// vandc VecGeneral -// vavgsb VecGeneral -// vavgsh VecGeneral -// vavgsw VecGeneral -// vavgub VecGeneral -// vavguh VecGeneral -// vavguw VecGeneral -// vcfsx VecFP -// vcfux VecFP -// vcmpbfp VecFPCompare -// vcmpeqfp VecFPCompare -// vcmpequb VecGeneral -// vcmpequh VecGeneral -// vcmpequw VecGeneral -// vcmpgefp VecFPCompare -// vcmpgtfp VecFPCompare -// vcmpgtsb VecGeneral -// vcmpgtsh VecGeneral -// vcmpgtsw VecGeneral -// vcmpgtub VecGeneral -// vcmpgtuh VecGeneral -// vcmpgtuw VecGeneral -// vctsxs VecFP -// vctuxs VecFP -// vexptefp VecFP -// vlogefp VecFP -// vmaddfp VecFP -// vmaxfp VecFPCompare -// vmaxsb VecGeneral -// vmaxsh VecGeneral -// vmaxsw VecGeneral -// vmaxub VecGeneral -// vmaxuh VecGeneral -// vmaxuw VecGeneral -// vmhaddshs VecComplex -// vmhraddshs VecComplex -// vminfp VecFPCompare -// vminsb VecGeneral -// vminsh VecGeneral -// vminsw VecGeneral -// vminub VecGeneral -// vminuh VecGeneral -// vminuw VecGeneral -// vmladduhm VecComplex -// vmrghb VecPerm -// vmrghh VecPerm -// vmrghw VecPerm -// vmrglb VecPerm -// vmrglh VecPerm -// vmrglw VecPerm -// vmsubfp VecFP -// vmsummbm VecComplex -// vmsumshm VecComplex -// vmsumshs VecComplex -// vmsumubm VecComplex -// vmsumuhm VecComplex -// vmsumuhs VecComplex -// vmulesb VecComplex -// vmulesh VecComplex -// vmuleub VecComplex -// vmuleuh VecComplex -// vmulosb VecComplex -// vmulosh VecComplex -// vmuloub VecComplex -// vmulouh VecComplex -// vnor VecGeneral -// vor VecGeneral -// vperm VecPerm -// vpkpx VecPerm -// vpkshss VecPerm -// vpkshus VecPerm -// vpkswss VecPerm -// vpkswus VecPerm -// vpkuhum VecPerm -// vpkuhus VecPerm -// vpkuwum VecPerm -// vpkuwus VecPerm -// vrefp VecFPRound -// vrfim VecFPRound -// vrfin VecFPRound -// vrfip VecFPRound -// vrfiz VecFPRound -// vrlb VecGeneral -// vrlh VecGeneral -// vrlw VecGeneral -// vrsqrtefp VecFP -// vsel VecGeneral -// vsl VecVSL -// vslb VecGeneral -// vsldoi VecPerm -// vslh VecGeneral -// vslo VecPerm -// vslw VecGeneral -// vspltb VecPerm -// vsplth VecPerm -// vspltisb VecPerm -// vspltish VecPerm -// vspltisw VecPerm -// vspltw VecPerm -// vsr VecVSR -// vsrab VecGeneral -// vsrah VecGeneral -// vsraw VecGeneral -// vsrb VecGeneral -// vsrh VecGeneral -// vsro VecPerm -// vsrw VecGeneral -// vsubcuw VecGeneral -// vsubfp VecFP -// vsubsbs VecGeneral -// vsubshs VecGeneral -// vsubsws VecGeneral -// vsububm VecGeneral -// vsububs VecGeneral -// vsubuhm VecGeneral -// vsubuhs VecGeneral -// vsubuwm VecGeneral -// vsubuws VecGeneral -// vsum2sws VecComplex -// vsum4sbs VecComplex -// vsum4shs VecComplex -// vsum4ubs VecComplex -// vsumsws VecComplex -// vupkhpx VecPerm -// vupkhsb VecPerm -// vupkhsh VecPerm -// vupklpx VecPerm -// vupklsb VecPerm -// vupklsh VecPerm -// vxor VecGeneral -// xor IntSimple -// xori IntSimple -// xoris IntSimple +// add IIC_IntSimple +// addc IIC_IntGeneral +// adde IIC_IntGeneral +// addi IIC_IntSimple +// addic IIC_IntGeneral +// addic. IIC_IntGeneral +// addis IIC_IntSimple +// addme IIC_IntGeneral +// addze IIC_IntGeneral +// and IIC_IntSimple +// andc IIC_IntSimple +// andi. IIC_IntGeneral +// andis. IIC_IntGeneral +// b IIC_BrB +// bc IIC_BrB +// bcctr IIC_BrB +// bclr IIC_BrB +// cmp IIC_IntCompare +// cmpi IIC_IntCompare +// cmpl IIC_IntCompare +// cmpli IIC_IntCompare +// cntlzd IIC_IntRotateD +// cntlzw IIC_IntGeneral +// crand IIC_BrCR +// crandc IIC_BrCR +// creqv IIC_BrCR +// crnand IIC_BrCR +// crnor IIC_BrCR +// cror IIC_BrCR +// crorc IIC_BrCR +// crxor IIC_BrCR +// dcba IIC_LdStDCBA +// dcbf IIC_LdStDCBF +// dcbi IIC_LdStDCBI +// dcbst IIC_LdStDCBF +// dcbt IIC_LdStLoad +// dcbtst IIC_LdStLoad +// dcbz IIC_LdStDCBF +// divd IIC_IntDivD +// divdu IIC_IntDivD +// divw IIC_IntDivW +// divwu IIC_IntDivW +// dss IIC_LdStDSS +// dst IIC_LdStDSS +// dstst IIC_LdStDSS +// eciwx IIC_LdStLoad +// ecowx IIC_LdStLoad +// eieio IIC_LdStLoad +// eqv IIC_IntSimple +// extsb IIC_IntSimple +// extsh IIC_IntSimple +// extsw IIC_IntSimple +// fabs IIC_FPGeneral +// fadd IIC_FPAddSub +// fadds IIC_FPGeneral +// fcfid IIC_FPGeneral +// fcmpo IIC_FPCompare +// fcmpu IIC_FPCompare +// fctid IIC_FPGeneral +// fctidz IIC_FPGeneral +// fctiw IIC_FPGeneral +// fctiwz IIC_FPGeneral +// fdiv IIC_FPDivD +// fdivs IIC_FPDivS +// fmadd IIC_FPFused +// fmadds IIC_FPGeneral +// fmr IIC_FPGeneral +// fmsub IIC_FPFused +// fmsubs IIC_FPGeneral +// fmul IIC_FPFused +// fmuls IIC_FPGeneral +// fnabs IIC_FPGeneral +// fneg IIC_FPGeneral +// fnmadd IIC_FPFused +// fnmadds IIC_FPGeneral +// fnmsub IIC_FPFused +// fnmsubs IIC_FPGeneral +// fres IIC_FPRes +// frsp IIC_FPGeneral +// frsqrte IIC_FPGeneral +// fsel IIC_FPGeneral +// fsqrt IIC_FPSqrtD +// fsqrts IIC_FPSqrtS +// fsub IIC_FPAddSub +// fsubs IIC_FPGeneral +// icbi IIC_LdStICBI +// isync IIC_SprISYNC +// lbz IIC_LdStLoad +// lbzu IIC_LdStLoadUpd +// lbzux IIC_LdStLoadUpdX +// lbzx IIC_LdStLoad +// ld IIC_LdStLD +// ldarx IIC_LdStLDARX +// ldu IIC_LdStLDU +// ldux IIC_LdStLDUX +// ldx IIC_LdStLD +// lfd IIC_LdStLFD +// lfdu IIC_LdStLFDU +// lfdux IIC_LdStLFDUX +// lfdx IIC_LdStLFD +// lfs IIC_LdStLFD +// lfsu IIC_LdStLFDU +// lfsux IIC_LdStLFDUX +// lfsx IIC_LdStLFD +// lha IIC_LdStLHA +// lhau IIC_LdStLHAU +// lhaux IIC_LdStLHAUX +// lhax IIC_LdStLHA +// lhbrx IIC_LdStLoad +// lhz IIC_LdStLoad +// lhzu IIC_LdStLoadUpd +// lhzux IIC_LdStLoadUpdX +// lhzx IIC_LdStLoad +// lmw IIC_LdStLMW +// lswi IIC_LdStLMW +// lswx IIC_LdStLMW +// lvebx IIC_LdStLVecX +// lvehx IIC_LdStLVecX +// lvewx IIC_LdStLVecX +// lvsl IIC_LdStLVecX +// lvsr IIC_LdStLVecX +// lvx IIC_LdStLVecX +// lvxl IIC_LdStLVecX +// lwa IIC_LdStLWA +// lwarx IIC_LdStLWARX +// lwaux IIC_LdStLHAUX +// lwax IIC_LdStLHA +// lwbrx IIC_LdStLoad +// lwz IIC_LdStLoad +// lwzu IIC_LdStLoadUpd +// lwzux IIC_LdStLoadUpdX +// lwzx IIC_LdStLoad +// mcrf IIC_BrMCR +// mcrfs IIC_FPGeneral +// mcrxr IIC_BrMCRX +// mfcr IIC_SprMFCR +// mffs IIC_IntMFFS +// mfmsr IIC_SprMFMSR +// mfspr IIC_SprMFSPR +// mfsr IIC_SprMFSR +// mfsrin IIC_SprMFSR +// mftb IIC_SprMFTB +// mfvscr IIC_IntMFVSCR +// mtcrf IIC_BrMCRX +// mtfsb0 IIC_IntMTFSB0 +// mtfsb1 IIC_IntMTFSB0 +// mtfsf IIC_IntMTFSB0 +// mtfsfi IIC_IntMTFSB0 +// mtmsr IIC_SprMTMSR +// mtmsrd IIC_LdStLD +// mtspr IIC_SprMTSPR +// mtsr IIC_SprMTSR +// mtsrd IIC_IntMTSRD +// mtsrdin IIC_IntMTSRD +// mtsrin IIC_SprMTSRIN +// mtvscr IIC_IntMFVSCR +// mulhd IIC_IntMulHD +// mulhdu IIC_IntMulHD +// mulhw IIC_IntMulHW +// mulhwu IIC_IntMulHWU +// mulld IIC_IntMulHD +// mulli IIC_IntMulLI +// mullw IIC_IntMulHW +// nand IIC_IntSimple +// neg IIC_IntSimple +// nor IIC_IntSimple +// or IIC_IntSimple +// orc IIC_IntSimple +// ori IIC_IntSimple +// oris IIC_IntSimple +// rfi IIC_SprRFI +// rfid IIC_IntRFID +// rldcl IIC_IntRotateD +// rldcr IIC_IntRotateD +// rldic IIC_IntRotateDI +// rldicl IIC_IntRotateDI +// rldicr IIC_IntRotateDI +// rldimi IIC_IntRotateDI +// rlwimi IIC_IntRotate +// rlwinm IIC_IntGeneral +// rlwnm IIC_IntGeneral +// sc IIC_SprSC +// slbia IIC_LdStSLBIA +// slbie IIC_LdStSLBIE +// sld IIC_IntRotateD +// slw IIC_IntGeneral +// srad IIC_IntRotateD +// sradi IIC_IntRotateDI +// sraw IIC_IntShift +// srawi IIC_IntShift +// srd IIC_IntRotateD +// srw IIC_IntGeneral +// stb IIC_LdStStore +// stbu IIC_LdStStoreUpd +// stbux IIC_LdStStoreUpd +// stbx IIC_LdStStore +// std IIC_LdStSTD +// stdcx. IIC_LdStSTDCX +// stdu IIC_LdStSTDU +// stdux IIC_LdStSTDUX +// stdx IIC_LdStSTD +// stfd IIC_LdStSTFD +// stfdu IIC_LdStSTFDU +// stfdux IIC_LdStSTFDU +// stfdx IIC_LdStSTFD +// stfiwx IIC_LdStSTFD +// stfs IIC_LdStSTFD +// stfsu IIC_LdStSTFDU +// stfsux IIC_LdStSTFDU +// stfsx IIC_LdStSTFD +// sth IIC_LdStStore +// sthbrx IIC_LdStStore +// sthu IIC_LdStStoreUpd +// sthux IIC_LdStStoreUpd +// sthx IIC_LdStStore +// stmw IIC_LdStLMW +// stswi IIC_LdStLMW +// stswx IIC_LdStLMW +// stvebx IIC_LdStSTVEBX +// stvehx IIC_LdStSTVEBX +// stvewx IIC_LdStSTVEBX +// stvx IIC_LdStSTVEBX +// stvxl IIC_LdStSTVEBX +// stw IIC_LdStStore +// stwbrx IIC_LdStStore +// stwcx. IIC_LdStSTWCX +// stwu IIC_LdStStoreUpd +// stwux IIC_LdStStoreUpd +// stwx IIC_LdStStore +// subf IIC_IntGeneral +// subfc IIC_IntGeneral +// subfe IIC_IntGeneral +// subfic IIC_IntGeneral +// subfme IIC_IntGeneral +// subfze IIC_IntGeneral +// sync IIC_LdStSync +// td IIC_IntTrapD +// tdi IIC_IntTrapD +// tlbia IIC_LdStSLBIA +// tlbie IIC_LdStDCBF +// tlbsync IIC_SprTLBSYNC +// tw IIC_IntTrapW +// twi IIC_IntTrapW +// vaddcuw IIC_VecGeneral +// vaddfp IIC_VecFP +// vaddsbs IIC_VecGeneral +// vaddshs IIC_VecGeneral +// vaddsws IIC_VecGeneral +// vaddubm IIC_VecGeneral +// vaddubs IIC_VecGeneral +// vadduhm IIC_VecGeneral +// vadduhs IIC_VecGeneral +// vadduwm IIC_VecGeneral +// vadduws IIC_VecGeneral +// vand IIC_VecGeneral +// vandc IIC_VecGeneral +// vavgsb IIC_VecGeneral +// vavgsh IIC_VecGeneral +// vavgsw IIC_VecGeneral +// vavgub IIC_VecGeneral +// vavguh IIC_VecGeneral +// vavguw IIC_VecGeneral +// vcfsx IIC_VecFP +// vcfux IIC_VecFP +// vcmpbfp IIC_VecFPCompare +// vcmpeqfp IIC_VecFPCompare +// vcmpequb IIC_VecGeneral +// vcmpequh IIC_VecGeneral +// vcmpequw IIC_VecGeneral +// vcmpgefp IIC_VecFPCompare +// vcmpgtfp IIC_VecFPCompare +// vcmpgtsb IIC_VecGeneral +// vcmpgtsh IIC_VecGeneral +// vcmpgtsw IIC_VecGeneral +// vcmpgtub IIC_VecGeneral +// vcmpgtuh IIC_VecGeneral +// vcmpgtuw IIC_VecGeneral +// vctsxs IIC_VecFP +// vctuxs IIC_VecFP +// vexptefp IIC_VecFP +// vlogefp IIC_VecFP +// vmaddfp IIC_VecFP +// vmaxfp IIC_VecFPCompare +// vmaxsb IIC_VecGeneral +// vmaxsh IIC_VecGeneral +// vmaxsw IIC_VecGeneral +// vmaxub IIC_VecGeneral +// vmaxuh IIC_VecGeneral +// vmaxuw IIC_VecGeneral +// vmhaddshs IIC_VecComplex +// vmhraddshs IIC_VecComplex +// vminfp IIC_VecFPCompare +// vminsb IIC_VecGeneral +// vminsh IIC_VecGeneral +// vminsw IIC_VecGeneral +// vminub IIC_VecGeneral +// vminuh IIC_VecGeneral +// vminuw IIC_VecGeneral +// vmladduhm IIC_VecComplex +// vmrghb IIC_VecPerm +// vmrghh IIC_VecPerm +// vmrghw IIC_VecPerm +// vmrglb IIC_VecPerm +// vmrglh IIC_VecPerm +// vmrglw IIC_VecPerm +// vmsubfp IIC_VecFP +// vmsummbm IIC_VecComplex +// vmsumshm IIC_VecComplex +// vmsumshs IIC_VecComplex +// vmsumubm IIC_VecComplex +// vmsumuhm IIC_VecComplex +// vmsumuhs IIC_VecComplex +// vmulesb IIC_VecComplex +// vmulesh IIC_VecComplex +// vmuleub IIC_VecComplex +// vmuleuh IIC_VecComplex +// vmulosb IIC_VecComplex +// vmulosh IIC_VecComplex +// vmuloub IIC_VecComplex +// vmulouh IIC_VecComplex +// vnor IIC_VecGeneral +// vor IIC_VecGeneral +// vperm IIC_VecPerm +// vpkpx IIC_VecPerm +// vpkshss IIC_VecPerm +// vpkshus IIC_VecPerm +// vpkswss IIC_VecPerm +// vpkswus IIC_VecPerm +// vpkuhum IIC_VecPerm +// vpkuhus IIC_VecPerm +// vpkuwum IIC_VecPerm +// vpkuwus IIC_VecPerm +// vrefp IIC_VecFPRound +// vrfim IIC_VecFPRound +// vrfin IIC_VecFPRound +// vrfip IIC_VecFPRound +// vrfiz IIC_VecFPRound +// vrlb IIC_VecGeneral +// vrlh IIC_VecGeneral +// vrlw IIC_VecGeneral +// vrsqrtefp IIC_VecFP +// vsel IIC_VecGeneral +// vsl IIC_VecVSL +// vslb IIC_VecGeneral +// vsldoi IIC_VecPerm +// vslh IIC_VecGeneral +// vslo IIC_VecPerm +// vslw IIC_VecGeneral +// vspltb IIC_VecPerm +// vsplth IIC_VecPerm +// vspltisb IIC_VecPerm +// vspltish IIC_VecPerm +// vspltisw IIC_VecPerm +// vspltw IIC_VecPerm +// vsr IIC_VecVSR +// vsrab IIC_VecGeneral +// vsrah IIC_VecGeneral +// vsraw IIC_VecGeneral +// vsrb IIC_VecGeneral +// vsrh IIC_VecGeneral +// vsro IIC_VecPerm +// vsrw IIC_VecGeneral +// vsubcuw IIC_VecGeneral +// vsubfp IIC_VecFP +// vsubsbs IIC_VecGeneral +// vsubshs IIC_VecGeneral +// vsubsws IIC_VecGeneral +// vsububm IIC_VecGeneral +// vsububs IIC_VecGeneral +// vsubuhm IIC_VecGeneral +// vsubuhs IIC_VecGeneral +// vsubuwm IIC_VecGeneral +// vsubuws IIC_VecGeneral +// vsum2sws IIC_VecComplex +// vsum4sbs IIC_VecComplex +// vsum4shs IIC_VecComplex +// vsum4ubs IIC_VecComplex +// vsumsws IIC_VecComplex +// vupkhpx IIC_VecPerm +// vupkhsb IIC_VecPerm +// vupkhsh IIC_VecPerm +// vupklpx IIC_VecPerm +// vupklsb IIC_VecPerm +// vupklsh IIC_VecPerm +// vxor IIC_VecGeneral +// xor IIC_IntSimple +// xori IIC_IntSimple +// xoris IIC_IntSimple // diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 37b6eac..218fed2 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -26,43 +26,39 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC 440/450 chip sets // -def IFTH1 : FuncUnit; // Fetch unit 1 -def IFTH2 : FuncUnit; // Fetch unit 2 -def PDCD1 : FuncUnit; // Decode unit 1 -def PDCD2 : FuncUnit; // Decode unit 2 -def DISS1 : FuncUnit; // Issue unit 1 -def DISS2 : FuncUnit; // Issue unit 2 -def LRACC : FuncUnit; // Register access and dispatch for - // the simple integer (J-pipe) and - // load/store (L-pipe) pipelines -def IRACC : FuncUnit; // Register access and dispatch for - // the complex integer (I-pipe) pipeline -def FRACC : FuncUnit; // Register access and dispatch for - // the floating-point execution (F-pipe) pipeline -def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline -def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline -def IWB : FuncUnit; // Write-back unit for the I pipeline -def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline -def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline -def JWB : FuncUnit; // Write-back unit for the J pipeline -def AGEN : FuncUnit; // Address generation for the L pipeline -def CRD : FuncUnit; // D-cache access for the L pipeline -def LWB : FuncUnit; // Write-back unit for the L pipeline -def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline -def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline -def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline -def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline -def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline -def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline -def FWB : FuncUnit; // Write-back unit for the F pipeline +def P440_DISS1 : FuncUnit; // Issue unit 1 +def P440_DISS2 : FuncUnit; // Issue unit 2 +def P440_LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def P440_IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def P440_FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def P440_IWB : FuncUnit; // Write-back unit for the I pipeline +def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def P440_JWB : FuncUnit; // Write-back unit for the J pipeline +def P440_AGEN : FuncUnit; // Address generation for the L pipeline +def P440_CRD : FuncUnit; // D-cache access for the L pipeline +def P440_LWB : FuncUnit; // Write-back unit for the L pipeline +def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def P440_FWB : FuncUnit; // Write-back unit for the F pipeline -def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used - // to make sure that no lwarx/stwcx. - // instructions are issued while another - // lwarx/stwcx. is in the L pipe. +def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. -def GPR_Bypass : Bypass; // The bypass for general-purpose regs. -def FPR_Bypass : Bypass; // The bypass for floating-point regs. +def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs. // Notes: // Instructions are held in the FRACC, LRACC and IRACC pipeline @@ -104,560 +100,500 @@ def FPR_Bypass : Bypass; // The bypass for floating-point regs. def PPC440Itineraries : ProcessorItineraries< - [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC, - IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, - FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], - [GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<33, [IWB]>], - [40, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<3, [AGEN], 1>, - InstrStage<2, [CRD], 1>, - InstrStage<1, [LWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC], 0>, - InstrStage<1, [LRACC], 0>, - InstrStage<1, [IRACC]>, - InstrStage<1, [FEXE1], 0>, - InstrStage<1, [AGEN], 0>, - InstrStage<1, [JEXE1], 0>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [FEXE2], 0>, - InstrStage<1, [CRD], 0>, - InstrStage<1, [JEXE2], 0>, - InstrStage<1, [IEXE2]>, - InstrStage<6, [FEXE3], 0>, - InstrStage<6, [LWB], 0>, - InstrStage<6, [JWB], 0>, - InstrStage<6, [IWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [9, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<25, [FWB]>], - [35, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<13, [FWB]>], - [23, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4], - [FPR_Bypass, FPR_Bypass]> + [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2, + P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD, + P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5, + P440_FEXE6, P440_FWB, P440_LWARX_Hold], + [P440_GPR_Bypass, P440_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<33, [P440_IWB]>], + [36, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<3, [P440_AGEN], 1>, + InstrStage<2, [P440_CRD], 1>, + InstrStage<1, [P440_LWB]>]>, + InstrItinData, + InstrStage<1, [P440_FRACC], 0>, + InstrStage<1, [P440_LRACC], 0>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_FEXE1], 0>, + InstrStage<1, [P440_AGEN], 0>, + InstrStage<1, [P440_JEXE1], 0>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_FEXE2], 0>, + InstrStage<1, [P440_CRD], 0>, + InstrStage<1, [P440_JEXE2], 0>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<6, [P440_FEXE3], 0>, + InstrStage<6, [P440_LWB], 0>, + InstrStage<6, [P440_JWB], 0>, + InstrStage<6, [P440_IWB]>]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [5, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<25, [P440_FWB]>], + [31, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<13, [P440_FWB]>], + [19, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0], + [P440_FPR_Bypass, P440_FPR_Bypass]> ]>; + +// ===---------------------------------------------------------------------===// +// PPC440 machine model for scheduling and other instruction cost heuristics. + +def PPC440Model : SchedMachineModel { + let IssueWidth = 2; // 2 instructions are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPC440Itineraries; +} + diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 1612cd2..1447696 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC A2 chip sets // -def XU : FuncUnit; // XU pipeline -def FU : FuncUnit; // FI pipeline +def A2_XU : FuncUnit; // A2_XU pipeline +def A2_FU : FuncUnit; // FI pipeline // // This file defines the itinerary class data for the PPC A2 processor. @@ -24,126 +24,140 @@ def FU : FuncUnit; // FI pipeline def PPCA2Itineraries : ProcessorItineraries< - [XU, FU], [], [ - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [39, 1, 1]>, - InstrItinData], - [71, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1]>, - InstrItinData], - [2, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 8, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [16, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [7, 1, 1]>, - InstrItinData], - [7, 9, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 8, 1, 1]>, - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [6]>, - InstrItinData], - [16]>, - InstrItinData], - [16, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [4, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [4, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [16]>, - InstrItinData], - [16]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [72, 1, 1]>, - InstrItinData], - [59, 1, 1]>, - InstrItinData], - [69, 1, 1]>, - InstrItinData], - [6, 1, 1, 1]>, - InstrItinData], - [6, 1]> + [A2_XU, A2_FU], [], [ + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [39, 0, 0]>, + InstrItinData], + [71, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0]>, + InstrItinData], + [2, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [16, 0, 0]>, + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [7, 0, 0]>, + InstrItinData], + [7, 9, 0, 0]>, + InstrItinData], + [7, 9, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [6]>, + InstrItinData], + [16]>, + InstrItinData], + [16, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [1, 0]>, + InstrItinData], + [4, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [4, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [16]>, + InstrItinData], + [16]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [72, 0, 0]>, + InstrItinData], + [59, 0, 0]>, + InstrItinData], + [69, 0, 0]>, + InstrItinData], + [65, 0, 0]>, + InstrItinData], + [6, 0, 0, 0]>, + InstrItinData], + [6, 0]> ]>; // ===---------------------------------------------------------------------===// // A2 machine model for scheduling and other instruction cost heuristics. def PPCA2Model : SchedMachineModel { - let IssueWidth = 1; // 2 micro-ops are dispatched per cycle. + let IssueWidth = 1; // 1 instruction is dispatched per cycle. let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td index c189b9e..dab89e3 100644 --- a/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -19,238 +19,285 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -def DIS0 : FuncUnit; // Dispatch stage - insn 1 -def DIS1 : FuncUnit; // Dispatch stage - insn 2 +def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // Some instructions can only execute in SFX0 but not SFX1. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is executed. -def SFX0 : FuncUnit; // Simple unit 0 -def SFX1 : FuncUnit; // Simple unit 1 -def BU : FuncUnit; // Branch unit -def CFX_DivBypass - : FuncUnit; // CFX divide bypass path -def CFX_0 : FuncUnit; // CFX pipeline -def LSU_0 : FuncUnit; // LSU pipeline -def FPU_0 : FuncUnit; // FPU pipeline +def E500_SFX0 : FuncUnit; // Simple unit 0 +def E500_SFX1 : FuncUnit; // Simple unit 1 +def E500_BU : FuncUnit; // Branch unit +def E500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E500_CFX_0 : FuncUnit; // CFX pipeline +def E500_LSU_0 : FuncUnit; // LSU pipeline +def E500_FPU_0 : FuncUnit; // FPU pipeline -def CR_Bypass : Bypass; +def E500_GPR_Bypass : Bypass; +def E500_FPR_Bypass : Bypass; +def E500_CR_Bypass : Bypass; def PPCE500mcItineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 1, 1], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<14, [CFX_DivBypass]>], - [17, 1, 1], // Latency=4..35, Repeat= 4..35 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<8, [FPU_0]>], - [11], // Latency = 8 - [FPR_Bypass]>, - InstrItinData, - InstrStage<8, [FPU_0]>], - [11, 1, 1], // Latency = 8 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0]>], - [5, 1], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 1], // Latency = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [6, 1, 1], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [5, 1], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0]>], - [5, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData, - InstrStage<5, [SFX0]>], - [8, 1], - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0]>], - [4, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<4, [FPU_0]>], - [13, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<68, [FPU_0]>], - [71, 1, 1], // Latency = 68, Repeat rate = 68 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<38, [FPU_0]>], - [41, 1, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<4, [FPU_0]>], - [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<38, [FPU_0]>], - [41, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass]> + [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass, + E500_CFX_0, E500_LSU_0, E500_FPU_0], + [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [E500_CR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0], 0>, + InstrStage<14, [E500_CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500_FPU_0]>], + [11], // Latency = 8 + [E500_FPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, + E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_SFX0, E500_SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0]>], + [5, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0], 0>]>, + InstrItinData, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0]>], + [4, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_CR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<68, [E500_FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500_FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass, + E500_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500_FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, E500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td index 7a24d20..de097d9 100644 --- a/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -20,280 +20,344 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -// def DIS0 : FuncUnit; -// def DIS1 : FuncUnit; +def E5500_DIS0 : FuncUnit; +def E5500_DIS1 : FuncUnit; // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is being executed. -// def SFX0 : FuncUnit; // Simple unit 0 -// def SFX1 : FuncUnit; // Simple unit 1 -// def BU : FuncUnit; // Branch unit -// def CFX_DivBypass -// : FuncUnit; // CFX divide bypass path -// def CFX_0 : FuncUnit; // CFX pipeline stage 0 +def E5500_SFX0 : FuncUnit; // Simple unit 0 +def E5500_SFX1 : FuncUnit; // Simple unit 1 +def E5500_BU : FuncUnit; // Branch unit +def E5500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0 -def CFX_1 : FuncUnit; // CFX pipeline stage 1 +def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 -// def LSU_0 : FuncUnit; // LSU pipeline -// def FPU_0 : FuncUnit; // FPU pipeline +def E5500_LSU_0 : FuncUnit; // LSU pipeline +def E5500_FPU_0 : FuncUnit; // FPU pipeline -// def CR_Bypass : Bypass; +def E5500_GPR_Bypass : Bypass; +def E5500_FPR_Bypass : Bypass; +def E5500_CR_Bypass : Bypass; def PPCE5500Itineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, - LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<26, [CFX_DivBypass]>], - [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<16, [CFX_DivBypass]>], - [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11], // Latency = 7, Repeat rate = 1 - [FPR_Bypass]>, - InstrItinData, - InstrStage<7, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 7 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [8, 2, 2], // Latency = 4 or 5, Repeat = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0]>], - [6, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<4, [LSU_0]>], - [8, 2], // Latency = r+3, Repeat rate = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>]>, - InstrItinData, - InstrStage<2, [CFX_0]>], - [6, 2], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData, - InstrStage<5, [CFX_0]>], - [9, 2], // Latency = 5, Repeat rate = 5 - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData, - InstrStage<4, [CFX_0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<31, [FPU_0]>], - [39, 2, 2], // Latency = 35, Repeat rate = 31 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<16, [FPU_0]>], - [24, 2, 2], // Latency = 20, Repeat rate = 16 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [12, 2], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass]> + [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU, + E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1, + E5500_LSU_0, E5500_FPU_0], + [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [E5500_CR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<26, [E5500_CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<16, [E5500_CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<7, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, + E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<4, [E5500_LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>]>, + InstrItinData, + InstrStage<2, [E5500_CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0], 0>]>, + InstrItinData, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E5500_SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<4, [E5500_CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_CR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<31, [E5500_FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<16, [E5500_FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass, + E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [E5500_FPR_Bypass, E5500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 72a0a39..21efd8f 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -11,61 +11,70 @@ // //===----------------------------------------------------------------------===// +def G3_BPU : FuncUnit; // Branch unit +def G3_SLU : FuncUnit; // Store/load unit +def G3_SRU : FuncUnit; // special register unit +def G3_IU1 : FuncUnit; // integer unit 1 (simple) +def G3_IU2 : FuncUnit; // integer unit 2 (complex) +def G3_FPU1 : FuncUnit; // floating point unit 1 def G3Itineraries : ProcessorItineraries< - [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index fc9120d..340773e 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,71 +11,86 @@ // //===----------------------------------------------------------------------===// +def G4_BPU : FuncUnit; // Branch unit +def G4_SLU : FuncUnit; // Store/load unit +def G4_SRU : FuncUnit; // special register unit +def G4_IU1 : FuncUnit; // integer unit 1 (simple) +def G4_IU2 : FuncUnit; // integer unit 2 (complex) +def G4_FPU1 : FuncUnit; // floating point unit 1 +def G4_VPU : FuncUnit; // vector permutation unit +def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4_VFPU : FuncUnit; // vector floating point unit + def G4Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1, + G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index a4e82ce..1d9f13f 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,78 +11,102 @@ // //===----------------------------------------------------------------------===// -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) +def G4P_BPU : FuncUnit; // Branch unit +def G4P_SLU : FuncUnit; // Store/load unit +def G4P_SRU : FuncUnit; // special register unit +def G4P_IU1 : FuncUnit; // integer unit 1 (simple) +def G4P_IU2 : FuncUnit; // integer unit 2 (complex) +def G4P_IU3 : FuncUnit; // integer unit 3 (simple) +def G4P_IU4 : FuncUnit; // integer unit 4 (simple) +def G4P_FPU1 : FuncUnit; // floating point unit 1 +def G4P_VPU : FuncUnit; // vector permutation unit +def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4P_VFPU : FuncUnit; // vector floating point unit def G4PlusItineraries : ProcessorItineraries< - [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1, + G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index c64998d..a3b73ab 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,90 +11,110 @@ // //===----------------------------------------------------------------------===// +def G5_BPU : FuncUnit; // Branch unit +def G5_SLU : FuncUnit; // Store/load unit +def G5_SRU : FuncUnit; // special register unit +def G5_IU1 : FuncUnit; // integer unit 1 (simple) +def G5_IU2 : FuncUnit; // integer unit 2 (complex) +def G5_FPU1 : FuncUnit; // floating point unit 1 +def G5_FPU2 : FuncUnit; // floating point unit 2 +def G5_VPU : FuncUnit; // vector permutation unit +def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G5_VFPU : FuncUnit; // vector floating point unit + def G5Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, // needs work - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, // needs work - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2, + G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; // ===---------------------------------------------------------------------===// -// e5500 machine model for scheduling and other instruction cost heuristics. +// G5 machine model for scheduling and other instruction cost heuristics. def G5Model : SchedMachineModel { let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td new file mode 100644 index 0000000..d3e4269 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleP7.td @@ -0,0 +1,385 @@ +//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER7 processor. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// IBM POWER7 multicore server processor +// B. Sinharoy, et al. +// IBM J. Res. & Dev. (55) 3. May/June 2011. + +// Scheduling for the P7 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P7_DU1 : FuncUnit; +def P7_DU2 : FuncUnit; +def P7_DU3 : FuncUnit; +def P7_DU4 : FuncUnit; +def P7_DU5 : FuncUnit; +def P7_DU6 : FuncUnit; + +def P7_LS1 : FuncUnit; // Load/Store pipeline 1 +def P7_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P7_FX1 : FuncUnit; // FX pipeline 1 +def P7_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P7_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P7_VS2 : FuncUnit; // VS pipeline 2 + +def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P7_BRU : FuncUnit; // BR unit + +// Notes: +// Each LSU pipeline can also execute FX add and logical instructions. +// Each LSU pipeline can complete a load or store in one cycle. +// +// Each store is broken into two parts, AGEN goes to the LSU while a +// "data steering" op. goes to the FXU or VSU. +// +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). +// +// Frequent FX ops. take only one cycle and results can be used again in the +// next cycle (there is a self-bypass). Getting results from the other FX +// pipeline takes an additional cycle. +// +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. +// Dispatch of an instruction to VS1 that uses four single prec. inputs +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any +// floating point instruction. +// +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline +// (unlike on the POWER6). +// +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP +// share the same write-back, and have a 5-cycle latency difference, so the +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP +// op. has been dispatched to VS1. +// +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. +// +// Instruction dispatch groups have (at most) four non-branch instructions, and +// two branches. Unlike on the POWER4/5, a branch does not automatically +// end the dispatch group, but a second branch must be the last in the group. + +def P7Itineraries : ProcessorItineraries< + [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6, + P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [ + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2, + P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + // FIXME: Add record-form itinerary data. + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<36, [P7_FX1, P7_FX2]>], + [36, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<68, [P7_FX1, P7_FX2]>], + [68, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_CRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_CRU]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1]>, // mtcr + InstrItinData, + InstrStage<1, [P7_CRU]>], + [6, 1]>, + InstrItinData, + InstrStage<1, [P7_CRU]>], + [3, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1]>], + [4, 1]>, // mtctr + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [8, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [33, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [27, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [44, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [32, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [7, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P7 machine model for scheduling and other instruction cost heuristics. + +def P7Model : SchedMachineModel { + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. + // Note that the dispatch bundle size is 6 (including + // branches), but the total internal issue bandwidth per + // cycle (from all queues) is 8. + + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = P7Itineraries; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 7231ab1..b07abe4 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -31,12 +31,24 @@ using namespace llvm; PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) + const std::string &FS, bool is64Bit, + CodeGenOpt::Level OptLevel) : PPCGenSubtargetInfo(TT, CPU, FS) , IsPPC64(is64Bit) , TargetTriple(TT) { initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); + + std::string FullFS = FS; + + // At -O2 and above, track CR bits as individual registers. + if (OptLevel >= CodeGenOpt::Default) { + if (!FullFS.empty()) + FullFS = "+crbits," + FullFS; + else + FullFS = "+crbits"; + } + + resetSubtargetFeatures(CPU, FullFS); } /// SetJITMode - This is called to inform the subtarget info that we are @@ -73,8 +85,10 @@ void PPCSubtarget::initializeEnvironment() { HasMFOCRF = false; Has64BitSupport = false; Use64BitRegs = false; + UseCRBits = false; HasAltivec = false; HasQPX = false; + HasVSX = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -179,7 +193,7 @@ bool PPCSubtarget::enablePostRAScheduler( return OptLevel >= CodeGenOpt::Default; } -// Embedded cores need aggressive scheduling. +// Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { default: return false; @@ -187,6 +201,7 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: + case PPC::DIR_PWR7: return true; } } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index c863a6e..87e012e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -73,6 +73,7 @@ protected: bool HasMFOCRF; bool Has64BitSupport; bool Use64BitRegs; + bool UseCRBits; bool IsPPC64; bool HasAltivec; bool HasQPX; @@ -103,7 +104,8 @@ public: /// of the specified triple. /// PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); + const std::string &FS, bool is64Bit, + CodeGenOpt::Level OptLevel); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -126,22 +128,6 @@ public: /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - /// getDataLayoutString - Return the pointer size and type alignment - /// properties of this subtarget. - const char *getDataLayoutString() const { - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - if (isPPC64() && isSVR4ABI()) { - if (TargetTriple.getOS() == llvm::Triple::FreeBSD) - return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64"; - else - return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; - } - - return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" - : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; - } - /// \brief Reset the features for the PowerPC target. virtual void resetSubtargetFeatures(const MachineFunction *MF); private: @@ -162,6 +148,10 @@ public: /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } + /// useCRBits - Return true if we should store and manipulate i1 values in + /// the individual condition register bits. + bool useCRBits() const { return UseCRBits; } + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. @@ -188,6 +178,7 @@ public: bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } + bool hasVSX() const { return HasVSX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } @@ -200,8 +191,6 @@ public: /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } - /// isBGP - True if this is a BG/P platform. - bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } /// isBGQ - True if this is a BG/Q platform. bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 9acefe5..e7438f3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -26,6 +26,10 @@ static cl:: opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); +static cl::opt +VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", + cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); + extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(ThePPC32Target); @@ -33,6 +37,41 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine C(ThePPC64LETarget); } +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { + const Triple &T = ST.getTargetTriple(); + + std::string Ret; + + // Most PPC* platforms are big endian, PPC64LE is little endian. + if (ST.isLittleEndian()) + Ret = "e"; + else + Ret = "E"; + + Ret += DataLayout::getManglingComponent(T); + + // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit + // pointers. + if (!ST.isPPC64() || T.getOS() == Triple::Lv2) + Ret += "-p:32:32"; + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + if (ST.isPPC64() || ST.isSVR4ABI()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. + if (ST.isPPC64()) + Ret += "-n32:64"; + else + Ret += "-n32"; + + return Ret; +} + PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -40,15 +79,11 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64Bit), - DL(Subtarget.getDataLayoutString()), InstrInfo(*this), + Subtarget(TT, CPU, FS, is64Bit, OL), + DL(getDataLayoutString(Subtarget)), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - - // The binutils for the BG/P are too old for CFI. - if (Subtarget.isBGP()) - setMCUseCFI(false); initAsmInfo(); } @@ -95,6 +130,7 @@ public: virtual bool addPreISel(); virtual bool addILPOpts(); virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); virtual bool addPreSched2(); virtual bool addPreEmitPass(); }; @@ -129,10 +165,26 @@ bool PPCPassConfig::addInstSelector() { addPass(createPPCCTRLoopsVerify()); #endif + if (getPPCSubtarget().hasVSX()) + addPass(createPPCVSXCopyPass()); + + return false; +} + +bool PPCPassConfig::addPreRegAlloc() { + if (getPPCSubtarget().hasVSX()) { + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, + &PPCVSXFMAMutateID); + } + return false; } bool PPCPassConfig::addPreSched2() { + if (getPPCSubtarget().hasVSX()) + addPass(createPPCVSXCopyCleanupPass()); + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index ec1e606..2903cc1 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "PPCTargetObjectFile.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/Mangler.h" using namespace llvm; @@ -22,16 +22,9 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) { InitializeELF(TM.Options.UseInitArray); } -const MCSection * PPC64LinuxTargetObjectFile:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - - const MCSection *DefaultSection = - TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); - - if (DefaultSection != ReadOnlySection) - return DefaultSection; - +const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI // when we have a constant that contains global relocations. This is // necessary because of this ABI's handling of pointers to functions in @@ -46,14 +39,17 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // linker, so we must use DataRelROSection instead of ReadOnlySection. // For more information, see the description of ELIMINATE_COPY_RELOCS in // GNU ld. - const GlobalVariable *GVar = dyn_cast(GV); + if (Kind.isReadOnly()) { + const GlobalVariable *GVar = dyn_cast(GV); - if (GVar && GVar->isConstant() && - (GVar->getInitializer()->getRelocationInfo() == - Constant::GlobalRelocations)) - return DataRelROSection; + if (GVar && GVar->isConstant() && + (GVar->getInitializer()->getRelocationInfo() == + Constant::GlobalRelocations)) + Kind = SectionKind::getReadOnlyWithRel(); + } - return DefaultSection; + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, + Mang, TM); } const MCExpr *PPC64LinuxTargetObjectFile:: diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h index 262c522..3e71bbc 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.h +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -20,14 +20,14 @@ namespace llvm { /// 64-bit PowerPC Linux. class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { - virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - virtual const MCSection * - SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const; + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override; /// \brief Describe a TLS variable address within debug info. - virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index e876be1..74b5f45 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -15,8 +15,10 @@ namespace llvm { class PPCTargetStreamer : public MCTargetStreamer { public: + PPCTargetStreamer(MCStreamer &S); virtual ~PPCTargetStreamer(); virtual void emitTCEntry(const MCSymbol &S) = 0; + virtual void emitMachine(StringRef CPU) = 0; }; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 8879630..2f4d5c1 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -19,8 +19,8 @@ #include "PPCTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -32,7 +32,7 @@ void initializePPCTTIPass(PassRegistry &); namespace { -class PPCTTI : public ImmutablePass, public TargetTransformInfo { +class PPCTTI final : public ImmutablePass, public TargetTransformInfo { const PPCTargetMachine *TM; const PPCSubtarget *ST; const PPCTargetLowering *TLI; @@ -52,15 +52,11 @@ public: initializePPCTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + virtual void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -68,7 +64,7 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + virtual void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -76,31 +72,33 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + virtual PopcntSupportKind + getPopcntSupport(unsigned TyWidth) const override; + virtual void getUnrollingPreferences( + Loop *L, UnrollingPreferences &UP) const override; /// @} /// \name Vector TTI Implementations /// @{ - virtual unsigned getNumberOfRegisters(bool Vector) const; - virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getNumberOfRegisters(bool Vector) const override; + virtual unsigned getRegisterBitWidth(bool Vector) const override; + virtual unsigned getMaximumUnrollFactor() const override; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const; + OperandValueKind) const override; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; + int Index, Type *SubTp) const override; virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; + Type *Src) const override; virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; + Type *CondTy) const override; virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; + unsigned Index) const override; virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace) const override; /// @} }; @@ -141,7 +139,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; - return 32; + return ST->hasVSX() ? 64 : 32; } unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { @@ -210,6 +208,14 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { + // Double-precision scalars are already located in index #0. + if (Index == 0) + return 0; + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + } + // Estimated cost of a load-hit-store delay. This was obtained // experimentally as a minimum needed to prevent unprofitable // vectorization for the paq8p benchmark. It may need to be @@ -235,14 +241,16 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; + unsigned Cost = + TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + + // FIXME: Update this for VSX loads/stores that support unaligned access. // PPC in general does not support unaligned loads and stores. They'll need // to be decomposed based on the alignment factor. unsigned SrcBytes = LT.second.getStoreSize(); if (SrcBytes && Alignment && Alignment < SrcBytes) - Cost *= (SrcBytes/Alignment); + Cost += LT.first*(SrcBytes/Alignment-1); return Cost; } diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt index fdb8a62..c9548c7 100644 --- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCInfo PowerPCTargetInfo.cpp ) - -add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index f77d85b..4102346 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCInfo parent = PowerPC -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = PowerPC -- cgit v1.1