aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-04-23 16:57:46 -0700
committerStephen Hines <srhines@google.com>2014-04-24 15:53:16 -0700
commit36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
treee6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Target/PowerPC
parent69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
downloadexternal_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp265
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/Disassembler/Makefile16
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp345
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp18
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp51
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp100
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp23
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp144
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp62
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h7
-rw-r--r--lib/Target/PowerPC/Makefile5
-rw-r--r--lib/Target/PowerPC/PPC.h7
-rw-r--r--lib/Target/PowerPC/PPC.td49
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp184
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp9
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp42
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td20
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp3
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp174
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp56
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp208
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h21
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp699
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1065
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h27
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td430
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td174
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td205
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp907
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td1297
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td816
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp56
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp156
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h13
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td86
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td981
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td1118
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td246
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td493
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td592
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td119
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td147
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td168
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td180
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td385
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp23
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h27
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp64
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.cpp30
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h10
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp56
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt2
69 files changed, 9136 insertions, 3343 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
index 3aa59c0..408858e 100644
--- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
@@ -1,8 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCAsmParser
PPCAsmParser.cpp
)
-
-add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index fe83fe1..8bb91cf 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -9,21 +9,22 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCMCExpr.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -94,6 +95,44 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static unsigned VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+static unsigned VSFRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -177,6 +216,7 @@ class PPCAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
const MCInstrInfo &MII;
bool IsPPC64;
+ bool IsDarwin;
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -185,6 +225,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool isPPC64() const { return IsPPC64; }
+ bool isDarwin() const { return IsDarwin; }
bool MatchRegisterName(const AsmToken &Tok,
unsigned &RegNo, int64_t &IntVal);
@@ -195,12 +236,14 @@ class PPCAsmParser : public MCTargetAsmParser {
PPCMCExpr::VariantKind &Variant);
const MCExpr *FixupVariantKind(const MCExpr *E);
bool ParseExpression(const MCExpr *&EVal);
+ bool ParseDarwinExpression(const MCExpr *&EVal);
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
bool ParseDirectiveMachine(SMLoc L);
+ bool ParseDarwinDirectiveMachine(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -227,6 +270,7 @@ public:
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
+ IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
@@ -339,6 +383,11 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -356,6 +405,7 @@ public:
bool isToken() const { return Kind == Token; }
bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
@@ -376,6 +426,7 @@ public:
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
bool isCCRegNumber() const { return (Kind == Expression
&& isUInt<3>(getExprCRVal())) ||
(Kind == Immediate
@@ -442,6 +493,16 @@ public:
Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
}
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ }
+
+ void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
@@ -867,7 +928,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ Out.EmitInstruction(Inst, STI);
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1081,10 +1142,16 @@ FixupVariantKind(const MCExpr *E) {
llvm_unreachable("Invalid expression kind!");
}
-/// Parse an expression. This differs from the default "parseExpression"
-/// in that it handles complex \code @l/@ha \endcode modifiers.
+/// ParseExpression. This differs from the default "parseExpression" in that
+/// it handles modifiers.
bool PPCAsmParser::
ParseExpression(const MCExpr *&EVal) {
+
+ if (isDarwin())
+ return ParseDarwinExpression(EVal);
+
+ // (ELF Platforms)
+ // Handle \code @l/@ha \endcode
if (getParser().parseExpression(EVal))
return true;
@@ -1098,6 +1165,55 @@ ParseExpression(const MCExpr *&EVal) {
return false;
}
+/// ParseDarwinExpression. (MachO Platforms)
+/// This differs from the default "parseExpression" in that it handles detection
+/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
+/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
+/// syntax form so it is done here. TODO: Determine if there is merit in arranging
+/// for this to be done at a higher level.
+bool PPCAsmParser::
+ParseDarwinExpression(const MCExpr *&EVal) {
+ PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
+ switch (getLexer().getKind()) {
+ default:
+ break;
+ case AsmToken::Identifier:
+ // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
+ // something starting with any other char should be part of the
+ // asm syntax. If handwritten asm includes an identifier like lo16,
+ // then all bets are off - but no-one would do that, right?
+ StringRef poss = Parser.getTok().getString();
+ if (poss.equals_lower("lo16")) {
+ Variant = PPCMCExpr::VK_PPC_LO;
+ } else if (poss.equals_lower("hi16")) {
+ Variant = PPCMCExpr::VK_PPC_HI;
+ } else if (poss.equals_lower("ha16")) {
+ Variant = PPCMCExpr::VK_PPC_HA;
+ }
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ Parser.Lex(); // Eat the xx16
+ if (getLexer().isNot(AsmToken::LParen))
+ return Error(Parser.getTok().getLoc(), "expected '('");
+ Parser.Lex(); // Eat the '('
+ }
+ break;
+ }
+
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+ Parser.Lex(); // Eat the ')'
+ EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext());
+ }
+ return false;
+}
+
+/// ParseOperand
+/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
+/// rNN for MachO.
bool PPCAsmParser::
ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
@@ -1121,14 +1237,31 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
return Error(S, "invalid register name");
+ case AsmToken::Identifier:
+ // Note that non-register-name identifiers from the compiler will begin
+ // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
+ // identifiers like r31foo - so we fall through in the event that parsing
+ // a register name fails.
+ if (isDarwin()) {
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+ Operands.push_back(Op);
+ return false;
+ }
+ }
+ // Fall-through to process non-register-name identifiers as expression.
// All other expressions
case AsmToken::LParen:
case AsmToken::Plus:
case AsmToken::Minus:
case AsmToken::Integer:
- case AsmToken::Identifier:
case AsmToken::Dot:
case AsmToken::Dollar:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
if (!ParseExpression(EVal))
break;
/* fall through */
@@ -1177,11 +1310,25 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
break;
case AsmToken::Integer:
- if (getParser().parseAbsoluteExpression(IntVal) ||
+ if (!isDarwin()) {
+ if (getParser().parseAbsoluteExpression(IntVal) ||
IntVal < 0 || IntVal > 31)
return Error(S, "invalid register number");
+ } else {
+ return Error(S, "unexpected integer value");
+ }
break;
+ case AsmToken::Identifier:
+ if (isDarwin()) {
+ unsigned RegNo;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ break;
+ }
+ }
+ // Fall-through..
+
default:
return Error(S, "invalid memory operand");
}
@@ -1261,14 +1408,19 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
/// ParseDirective parses the PPC specific directives
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal == ".word")
- return ParseDirectiveWord(2, DirectiveID.getLoc());
- if (IDVal == ".llong")
- return ParseDirectiveWord(8, DirectiveID.getLoc());
- if (IDVal == ".tc")
- return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
- if (IDVal == ".machine")
- return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (!isDarwin()) {
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ if (IDVal == ".llong")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ if (IDVal == ".tc")
+ return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
+ } else {
+ if (IDVal == ".machine")
+ return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
+ }
return true;
}
@@ -1279,7 +1431,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
for (;;) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
- return true;
+ return false;
getParser().getStreamer().EmitValue(Value, Size);
@@ -1303,8 +1455,10 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
while (getLexer().isNot(AsmToken::EndOfStatement)
&& getLexer().isNot(AsmToken::Comma))
Parser.Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
Parser.Lex();
// Align to word size.
@@ -1314,12 +1468,14 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
return ParseDirectiveWord(Size, L);
}
-/// ParseDirectiveMachine
+/// ParseDirectiveMachine (ELF platforms)
/// ::= .machine [ cpu | "push" | "pop" ]
bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
if (getLexer().isNot(AsmToken::Identifier) &&
- getLexer().isNot(AsmToken::String))
- return Error(L, "unexpected token in directive");
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
StringRef CPU = Parser.getTok().getIdentifier();
Parser.Lex();
@@ -1329,11 +1485,56 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
// Implement ".machine any" (by doing nothing) for the benefit
// of existing assembler code. Likewise, we can then implement
// ".machine push" and ".machine pop" as no-op.
- if (CPU != "any" && CPU != "push" && CPU != "pop")
- return Error(L, "unrecognized machine type");
+ if (CPU != "any" && CPU != "push" && CPU != "pop") {
+ Error(L, "unrecognized machine type");
+ return false;
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(L, "unexpected token in directive");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitMachine(CPU);
+
+ return false;
+}
+
+/// ParseDarwinDirectiveMachine (Mach-o platforms)
+/// ::= .machine cpu-identifier
+bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: this is only the 'default' set of cpu variants.
+ // However we don't act on this information at present, this is simply
+ // allowing parsing to proceed with minimal sanity checking.
+ if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") {
+ Error(L, "unrecognized cpu type");
+ return false;
+ }
+
+ if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) {
+ Error(L, "wrong cpu type specified for 64bit");
+ return false;
+ }
+ if (!isPPC64() && CPU == "ppc64") {
+ Error(L, "wrong cpu type specified for 32bit");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
return false;
}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 9a763f5..ea4de63 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
@@ -34,9 +35,8 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
-add_dependencies(LLVMPowerPCCodeGen PowerPCCommonTableGen intrinsics_gen)
-
add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..ca457df
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMPowerPCDisassembler
+ PPCDisassembler.cpp
+ )
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000..7f29040
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCDisassembler
+parent = PowerPC
+required_libraries = MC Support PowerPCDesc PowerPCInfo
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/Makefile b/lib/Target/PowerPC/Disassembler/Makefile
new file mode 100644
index 0000000..86e3b47
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDisassembler
+
+# Hack: we need to include 'main' PPC target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
new file mode 100644
index 0000000..c4a7544
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -0,0 +1,345 @@
+//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class PPCDisassembler : public MCDisassembler {
+public:
+ PPCDisassembler(const MCSubtargetInfo &STI)
+ : MCDisassembler(STI) {}
+ virtual ~PPCDisassembler() {}
+
+ // Override MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createPPCDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new PPCDisassembler(STI);
+}
+
+extern "C" void LLVMInitializePowerPCDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(ThePPC32Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
+ createPPCDisassembler);
+}
+
+// FIXME: These can be generated by TableGen from the existing register
+// encoding values!
+
+static const unsigned CRRegs[] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+static const unsigned CRBITRegs[] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+
+static const unsigned FRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+
+static const unsigned VRegs[] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
+static const unsigned VSFRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
+static const unsigned GPRegs[] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned GP0Regs[] = {
+ PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned G8Regs[] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
+template <std::size_t N>
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned (&Regs)[N]) {
+ assert(RegNo < N && "Invalid register number");
+ Inst.addOperand(MCOperand::CreateReg(Regs[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
+static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRBITRegs);
+}
+
+static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VRegs);
+}
+
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GPRegs);
+}
+
+static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GP0Regs);
+}
+
+static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G8Regs);
+}
+
+#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
+#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memri field (imm, reg), which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 16;
+ uint64_t Disp = Imm & 0xFFFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ switch (Inst.getOpcode()) {
+ default: break;
+ case PPC::LBZU:
+ case PPC::LHAU:
+ case PPC::LHZU:
+ case PPC::LWZU:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ case PPC::STBU:
+ case PPC::STHU:
+ case PPC::STWU:
+ case PPC::STFSU:
+ case PPC::STFDU:
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memrix field (imm, reg), which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 14;
+ uint64_t Disp = Imm & 0x3FFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ if (Inst.getOpcode() == PPC::LDU)
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ else if (Inst.getOpcode() == PPC::STDU)
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // The cr bit encoding is 0x80 >> cr_reg_num.
+
+ unsigned Zeros = countTrailingZeros(Imm);
+ assert(Zeros < 8 && "Invalid CR bit value");
+
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros]));
+ return MCDisassembler::Success;
+}
+
+#include "PPCGenDisassemblerTables.inc"
+
+DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ // Get the four bytes of the instruction.
+ uint8_t Bytes[4];
+ Size = 4;
+ if (Region.readBytes(Address, Size, Bytes) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // The instruction is big-endian encoded.
+ uint32_t Inst = (Bytes[0] << 24) |
+ (Bytes[1] << 16) |
+ (Bytes[2] << 8) |
+ (Bytes[3] << 0);
+
+ return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
+}
+
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index a605cc4..ab30a11 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -1,7 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 8281b5c..dc54b52 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -149,6 +149,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
case PPC::PRED_NU:
O << "nu";
return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Invalid predicate code");
}
@@ -184,6 +187,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
case PPC::PRED_NU_PLUS:
O << "+";
return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Invalid predicate code");
}
@@ -193,6 +199,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -310,7 +323,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8a4c03d..4d1df78 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,7 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = 0);
-
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 7b3e843..9d173d6 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -16,18 +16,20 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = PowerPC
parent = Target
+has_asmparser = 1
has_asmprinter = 1
+has_disassembler = 1
has_jit = 1
[component_1]
type = Library
name = PowerPCCodeGen
parent = PowerPC
-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index 3efa5ec..3cea65e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -8,5 +8,3 @@ add_llvm_library(LLVMPowerPCDesc
PPCMachObjectWriter.cpp
PPCELFObjectWriter.cpp
)
-
-add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 0d42081..f7309bb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -71,14 +71,16 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
namespace {
class PPCAsmBackend : public MCAsmBackend {
-const Target &TheTarget;
+ const Target &TheTarget;
+ bool IsLittleEndian;
public:
- PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
+ PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T),
+ IsLittleEndian(isLittle) {}
unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
@@ -88,17 +90,27 @@ public:
{ "fixup_ppc_half16ds", 0, 14, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
+ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 2, 24, 0 },
+ { "fixup_ppc_brcond14abs", 2, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+ return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
+ uint64_t Value, bool IsPCRel) const {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -108,8 +120,10 @@ public:
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the appropriate
// bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff);
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i);
+ Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ }
}
bool mayNeedRelaxation(const MCInst &Inst) const {
@@ -152,6 +166,10 @@ public:
assert(Name == "ppc32" && "Unknown target name!");
return 4;
}
+
+ bool isLittleEndian() const {
+ return IsLittleEndian;
+ }
};
} // end anonymous namespace
@@ -160,7 +178,7 @@ public:
namespace {
class DarwinPPCAsmBackend : public PPCAsmBackend {
public:
- DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
@@ -170,26 +188,18 @@ namespace {
(is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
MachO::CPU_SUBTYPE_POWERPC_ALL);
}
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
- }
};
class ELFPPCAsmBackend : public PPCAsmBackend {
uint8_t OSABI;
public:
- ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
- PPCAsmBackend(T), OSABI(OSABI) { }
+ ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
+ PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
- return createPPCELFObjectWriter(OS, is64, OSABI);
- }
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
+ return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
};
@@ -202,5 +212,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
return new DarwinPPCAsmBackend(T);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFPPCAsmBackend(T, OSABI);
+ bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le;
+ return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 54de70e..d19f6a0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
@@ -27,17 +28,8 @@ namespace {
virtual unsigned getRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const;
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend) const;
- virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const;
- virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
};
}
@@ -49,12 +41,38 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
PPCELFObjectWriter::~PPCELFObjectWriter() {
}
+static MCSymbolRefExpr::VariantKind getAccessVariant(const MCFixup &Fixup) {
+ const MCExpr *Expr = Fixup.getValue();
+
+ if (Expr->getKind() != MCExpr::Target)
+ return Fixup.getAccessVariant();
+
+ switch (cast<PPCMCExpr>(Expr)->getKind()) {
+ case PPCMCExpr::VK_PPC_None:
+ return MCSymbolRefExpr::VK_None;
+ case PPCMCExpr::VK_PPC_LO:
+ return MCSymbolRefExpr::VK_PPC_LO;
+ case PPCMCExpr::VK_PPC_HI:
+ return MCSymbolRefExpr::VK_PPC_HI;
+ case PPCMCExpr::VK_PPC_HA:
+ return MCSymbolRefExpr::VK_PPC_HA;
+ case PPCMCExpr::VK_PPC_HIGHERA:
+ return MCSymbolRefExpr::VK_PPC_HIGHERA;
+ case PPCMCExpr::VK_PPC_HIGHER:
+ return MCSymbolRefExpr::VK_PPC_HIGHER;
+ case PPCMCExpr::VK_PPC_HIGHEST:
+ return MCSymbolRefExpr::VK_PPC_HIGHEST;
+ case PPCMCExpr::VK_PPC_HIGHESTA:
+ return MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ }
+ llvm_unreachable("unknown PPCMCExpr kind");
+}
+
unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const
{
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Fixup);
// determine the type of the relocation
unsigned Type;
@@ -356,64 +374,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) const {
+ bool IsPCRel) const {
return getRelocTypeInner(Target, Fixup, IsPCRel);
}
-const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- assert(Target.getSymA() && "SymA cannot be 0");
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
- bool EmitThisSym;
- switch (Modifier) {
- // GOT references always need a relocation, even if the
- // target symbol is local.
- case MCSymbolRefExpr::VK_GOT:
- case MCSymbolRefExpr::VK_PPC_GOT_LO:
- case MCSymbolRefExpr::VK_PPC_GOT_HI:
- case MCSymbolRefExpr::VK_PPC_GOT_HA:
- EmitThisSym = true;
- break;
- default:
- EmitThisSym = false;
- break;
- }
-
- if (EmitThisSym)
- return &Target.getSymA()->getSymbol().AliasedSymbol();
- return NULL;
-}
-
-const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- assert(Target.getSymA() && "SymA cannot be 0");
- const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
-
- unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel);
-
- // The .odp creation emits a relocation against the symbol ".TOC." which
- // create a R_PPC64_TOC relocation. However the relocation symbol name
- // in final object creation should be NULL, since the symbol does not
- // really exist, it is just the reference to TOC base for the current
- // object file.
- bool EmitThisSym = RelocType != ELF::R_PPC64_TOC;
-
- if (EmitThisSym && !Symbol.isTemporary())
- return &Symbol;
- return NULL;
-}
-
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
+ bool IsLittleEndian,
uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
- return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index f3dddce..18609e1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
-PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
@@ -30,22 +32,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
AssemblerDialect = 1; // New-Style mnemonics.
SupportsDebugInformation= true; // Debug information.
+
+ // The installed assembler for OSX < 10.6 lacks some directives.
+ // FIXME: this should really be a check on the assembler characteristics
+ // rather than OS version
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+ HasWeakDefCanBeHiddenDirective = false;
+
+ UseIntegratedAssembler = true;
}
void PPCLinuxMCAsmInfo::anchor() { }
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
- IsLittleEndian = false;
+ IsLittleEndian = T.getArch() == Triple::ppc64le;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
CommentString = "#";
- GlobalPrefix = "";
- PrivateGlobalPrefix = ".L";
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
@@ -65,5 +73,10 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
AssemblerDialect = 1; // New-Style mnemonics.
+
+ if (T.getOS() == llvm::Triple::FreeBSD ||
+ (T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
+ (T.getOS() == llvm::Triple::OpenBSD && !is64Bit))
+ UseIntegratedAssembler = true;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 1530e77..cee2cb7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -18,17 +18,18 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+class Triple;
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
virtual void anchor();
public:
- explicit PPCMCAsmInfoDarwin(bool is64Bit);
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
- explicit PPCLinuxMCAsmInfo(bool is64Bit);
+ explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 346a9be..b259c5d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -33,70 +33,85 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
- const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
const MCContext &CTX;
- Triple TT;
+ bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
+ : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
}
~PPCMCCodeEmitter() {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// For fast-isel, a float COPY_TO_REGCLASS can survive this long.
// It's just a nop to keep the register classes happy, so don't
// generate anything.
unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
return;
- uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
- unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
- if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
- Opcode == PPC::BL8_NOP_TLS)
- Size = 8;
-
- // Output the constant in big endian byte order.
- int ShiftValue = (Size * 8) - 8;
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)(Bits >> ShiftValue);
- Bits <<= 8;
+ // Output the constant in big/little endian byte order.
+ unsigned Size = Desc.getSize();
+ if (IsLittleEndian) {
+ for (unsigned i = 0; i != Size; ++i) {
+ OS << (char)Bits;
+ Bits >>= 8;
+ }
+ } else {
+ int ShiftValue = (Size * 8) - 8;
+ for (unsigned i = 0; i != Size; ++i) {
+ OS << (char)(Bits >> ShiftValue);
+ Bits <<= 8;
+ }
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
@@ -110,14 +125,17 @@ MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new PPCMCCodeEmitter(MCII, STI, Ctx);
+ Triple TT(STI.getTargetTriple());
+ bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
+ return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
}
unsigned PPCMCCodeEmitter::
getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -126,9 +144,10 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -138,9 +157,10 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -150,9 +170,10 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -161,79 +182,87 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the immediate field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return 0;
}
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Encode (imm, reg) as a memri, which has the low 16-bits as the
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+ return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return RegBits;
}
unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Encode (imm, reg) as a memrix, which has the low 14-bits as the
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits;
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
return RegBits;
}
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
- return CTX.getRegisterInfo()->getEncodingValue(PPC::X13);
+ Triple TT(STI.getTargetTriple());
+ bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
}
unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// For special TLS calls, we need two fixups; one for the branch target
// (__tls_get_addr), which we create via getDirectBrEncoding as usual,
// and one for the TLSGD or TLSLD symbol, which is emitted here.
const MCOperand &MO = MI.getOperand(OpNo+1);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
- return getDirectBrEncoding(MI, OpNo, Fixups);
+ return getDirectBrEncoding(MI, OpNo, Fixups, STI);
}
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
@@ -244,7 +273,8 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
if (MO.isReg()) {
// MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index d7e8402..c181e03 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -9,9 +9,9 @@
#define DEBUG_TYPE "ppcmcexpr"
#include "PPCMCExpr.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
using namespace llvm;
@@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const {
MCValue Value;
- if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
return false;
if (Value.isAbsolute()) {
@@ -86,6 +86,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
}
Res = MCValue::get(Result);
} else {
+ if (!Layout)
+ return false;
+
MCContext &Context = Layout->getAssembler().getContext();
const MCSymbolRefExpr *Sym = Value.getSymA();
MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index e44c7c1..5fc7918 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -10,9 +10,9 @@
#ifndef PPCMCEXPR_H
#define PPCMCEXPR_H
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmLayout.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f18d095..105c511 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -39,6 +39,7 @@ using namespace llvm;
// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() {}
+PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
@@ -72,9 +73,9 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
- MAI = new PPCMCAsmInfoDarwin(isPPC64);
+ MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
- MAI = new PPCLinuxMCAsmInfo(isPPC64);
+ MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
@@ -112,7 +113,8 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
- PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
+ PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : PPCTargetStreamer(S), OS(OS) {}
virtual void emitTCEntry(const MCSymbol &S) {
OS << "\t.tc ";
OS << S.getName();
@@ -120,12 +122,33 @@ public:
OS << S.getName();
OS << '\n';
}
+ virtual void emitMachine(StringRef CPU) {
+ OS << "\t.machine " << CPU << '\n';
+ }
};
class PPCTargetELFStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
virtual void emitTCEntry(const MCSymbol &S) {
// Creates a R_PPC64_TOC relocation
- Streamer->EmitSymbolValue(&S, 8);
+ Streamer.EmitSymbolValue(&S, 8);
+ }
+ virtual void emitMachine(StringRef CPU) {
+ // FIXME: Is there anything to do in here or does this directive only
+ // limit the parser?
+ }
+};
+
+class PPCTargetMachOStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ virtual void emitTCEntry(const MCSymbol &S) {
+ llvm_unreachable("Unknown pseudo-op: .tc");
+ }
+ virtual void emitMachine(StringRef CPU) {
+ // FIXME: We should update the CPUType, CPUSubType in the Object file if
+ // the new values are different from the defaults.
}
};
}
@@ -135,25 +158,32 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI,
bool RelaxAll,
bool NoExecStack) {
- if (Triple(TT).isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ if (Triple(TT).isOSDarwin()) {
+ MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ new PPCTargetMachOStreamer(*S);
+ return S;
+ }
- PPCTargetStreamer *S = new PPCTargetELFStreamer();
- return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ MCStreamer *S =
+ createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ new PPCTargetELFStreamer(*S);
+ return S;
}
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc, bool useCFI,
- bool useDwarfDirectory, MCInstPrinter *InstPrint,
- MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
- PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
-
- return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
- useDwarfDirectory, InstPrint, CE, TAB,
- ShowInst);
+ bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+
+ MCStreamer *S =
+ llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
+ InstPrint, CE, TAB, ShowInst);
+ new PPCTargetAsmStreamer(*S, OS);
+ return S;
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 0b0ca24..474395b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -46,6 +46,7 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
+ bool IsLittleEndian,
uint8_t OSABI);
/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 63facc5..c2987b6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -42,6 +42,10 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS;
case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS;
case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS;
+
+ // Simple predicates for single condition-register bits.
+ case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET;
+ case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET;
}
llvm_unreachable("Unknown PPC branch opcode!");
}
@@ -72,6 +76,10 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS;
case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS;
case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS;
+
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index d498c2f..10e328a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -48,7 +48,12 @@ namespace PPC {
PRED_GT_PLUS = (1 << 5) | 15,
PRED_NE_PLUS = (2 << 5) | 7,
PRED_UN_PLUS = (3 << 5) | 15,
- PRED_NU_PLUS = (3 << 5) | 7
+ PRED_NU_PLUS = (3 << 5) | 7,
+
+ // When dealing with individual condition-register bits, we have simple set
+ // and unset predicates.
+ PRED_BIT_SET = 1024,
+ PRED_BIT_UNSET = 1025
};
/// Invert the specified predicate. != -> ==, < -> >=.
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 21fdcd9..c966748 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
PPCGenInstrInfo.inc PPCGenDAGISel.inc \
PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
- PPCGenMCCodeEmitter.inc PPCGenFastISel.inc
+ PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
+ PPCGenDisassemblerTables.inc
-DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser Disassembler InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f0d5af2..c42c5be 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -23,6 +23,7 @@
namespace llvm {
class PPCTargetMachine;
+ class PassRegistry;
class FunctionPass;
class ImmutablePass;
class JITCodeEmitter;
@@ -35,6 +36,9 @@ namespace llvm {
FunctionPass *createPPCCTRLoopsVerify();
#endif
FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXCopyCleanupPass();
+ FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -45,6 +49,9 @@ namespace llvm {
/// \brief Creates an PPC-specific Target Transformation Info pass.
ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
+ void initializePPCVSXFMAMutatePass(PassRegistry&);
+ extern char &PPCVSXFMAMutateID;
+
namespace PPCII {
/// Target Operand Flag enum.
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 54e3d40..bd58539 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -51,6 +51,8 @@ def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
+ "Use condition-register bits individually">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
@@ -88,7 +90,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
- "Enable VSX instructions">;
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -110,6 +113,12 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// their record-form variants.
class RecFormRel;
+// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX
+// FMA instruction forms with their corresponding factor-killing forms.
+class AltVSXFMARel {
+ bit IsVSXFMAAlt = 0;
+}
+
//===----------------------------------------------------------------------===//
// Relation Map Definitions.
//===----------------------------------------------------------------------===//
@@ -140,6 +149,19 @@ def getNonRecordFormOpcode : InstrMapping {
let ValueCols = [["0"]];
}
+def getAltVSXFMAOpcode : InstrMapping {
+ let FilterClass = "AltVSXFMARel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["IsVSXFMAAlt"];
+ // The key column are the (default) addend-killing instructions.
+ let KeyCol = ["0"];
+ // Value columns IsVSXFMAAlt=1
+ let ValueCols = [["1"]];
+}
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -153,12 +175,12 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
-def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
- FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
-def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
- FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
+def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -254,7 +276,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", G5Model,
+def : ProcessorModel<"pwr7", P7Model,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
@@ -283,11 +305,11 @@ include "PPCCallingConv.td"
def PPCInstrInfo : InstrInfo {
let isLittleEndianEncoding = 1;
-}
-def PPCAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- bit isMCAsmWriter = 1;
+ // FIXME: Unset this when no longer needed!
+ let decodePositionallyEncodedOperands = 1;
+
+ let noNamedPositionallyEncodedOperands = 1;
}
def PPCAsmParser : AsmParser {
@@ -306,8 +328,7 @@ def PPCAsmParserVariant : AsmParserVariant {
def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
-
- let AssemblyWriters = [PPCAsmWriter];
+
let AssemblyParsers = [PPCAsmParser];
let AssemblyParserVariants = [PPCAsmParserVariant];
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ada34ed..9ce8ea9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -19,24 +19,24 @@
#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "MCTargetDesc/PPCPredicates.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -54,7 +54,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -130,7 +129,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
@@ -139,6 +141,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
+ const DataLayout *DL = TM.getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -157,37 +160,13 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
return;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- // FIXME: PIC relocation model
- return;
case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getIndex();
return;
case MachineOperand::MO_BlockAddress:
O << *GetBlockAddressSymbol(MO.getBlockAddress());
return;
- case MachineOperand::MO_ExternalSymbol: {
- // Computing the address of an external symbol, not calling it.
- if (TM.getRelocationModel() == Reloc::Static) {
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- return;
- }
-
- MCSymbol *NLPSym =
- OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
- MO.getSymbolName()+"$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
-
- O << *NLPSym;
- return;
- }
case MachineOperand::MO_GlobalAddress: {
// Computing the address of a global symbol, not calling it.
const GlobalValue *GV = MO.getGlobal();
@@ -197,7 +176,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (TM.getRelocationModel() != Reloc::Static &&
(GV->isDeclaration() || GV->isWeakForLinker())) {
if (!GV->hasHiddenVisibility()) {
- SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>()
.getGVStubEntry(SymToPrint);
@@ -206,7 +185,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
- SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().
@@ -305,12 +284,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
-
+ const DataLayout *DL = TM.getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
while (TOCEntry == 0) {
- if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
"C" + Twine(TOCLabelID++)) == 0) {
TOCEntry = GetTempSymbol("C", TOCLabelID);
}
@@ -325,6 +304,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
+ bool isPPC64 = Subtarget.isPPC64();
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
@@ -340,7 +320,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
@@ -376,7 +356,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
@@ -401,8 +381,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalValue *RealGValue =
+ GAlias ? GAlias->getAliasedGlobal() : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
@@ -422,7 +402,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::LDtocL: {
@@ -448,8 +428,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalValue *RealGValue =
+ GAlias ? GAlias->getAliasedGlobal() : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
@@ -463,7 +443,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::ADDItocL: {
@@ -483,8 +463,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalValue *RealGValue =
+ GAlias ? GAlias->getAliasedGlobal() : GValue;
MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
@@ -499,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::ADDISgotTprelHA: {
@@ -512,18 +492,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTprel));
return;
}
- case PPC::LDgotTprelL: {
+ case PPC::LDgotTprelL:
+ case PPC::LDgotTprelL32: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LD.
- TmpInst.setOpcode(PPC::LD);
+ TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -531,7 +512,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::PPC32GOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
+ OutContext);
+ const MCExpr *SymGotTlsHA =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsL));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsHA));
return;
}
case PPC::ADDIStlsgdHA: {
@@ -544,7 +543,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsGD));
@@ -560,7 +559,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
@@ -581,7 +580,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -596,7 +595,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsLD));
@@ -612,7 +611,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
@@ -633,7 +632,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -648,7 +647,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X3)
.addExpr(SymDtprel));
@@ -664,7 +663,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -679,7 +678,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
.addReg(MI->getOperand(0).getReg()));
return;
}
@@ -695,19 +694,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
->getEncodingValue(MI->getOperand(0).getReg());
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
.addImm(Mask)
.addReg(MI->getOperand(1).getReg()));
return;
}
break;
- case PPC::SYNC:
- // In Book E sync is called msync, handle this special case here...
- if (Subtarget.isBookE()) {
- OutStreamer.EmitRawText(StringRef("\tmsync"));
- return;
- }
- break;
case PPC::LD:
case PPC::STD:
case PPC::LWA_32:
@@ -730,7 +722,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
}
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
@@ -773,7 +765,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
+ static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
@@ -861,13 +853,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-
- // FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport()) {
- assert(Directive < array_lengthof(CPUDirectives) &&
- "CPUDirectives[] might not be up-to-date!");
- OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
- }
+
+ assert(Directive < array_lengthof(CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ TStreamer.emitMachine(CPUDirectives[Directive]);
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -877,14 +868,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getRelocationModel() == Reloc::PIC_) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText()));
} else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText()));
}
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
@@ -916,8 +907,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
if (TM.getRelocationModel() == Reloc::PIC_) {
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.SwitchSection(StubSection);
@@ -937,32 +928,32 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
// bcl 20, 31, AnonSymbol
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
// mflr r11
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
.addExpr(SubHa16));
// mtlr r0
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(SubLo16).addExpr(SubLo16)
.addReg(PPC::R11));
// mtctr r12
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
@@ -984,8 +975,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
MCSymbol *Stub = Stubs[i].first;
@@ -1001,7 +992,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS)
.addReg(PPC::R11)
.addExpr(LazyPtrHa16));
@@ -1009,15 +1000,15 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// lwzu r12, lo16(LazyPtr)(r11)
const MCExpr *LazyPtrLo16 =
PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
.addReg(PPC::R11));
// mtctr r12
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
@@ -1058,7 +1049,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
E = Personalities.end(); I != E; ++I) {
if (*I) {
- MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
@@ -1147,4 +1138,5 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
}
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 3e608ca..9276211 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -115,6 +115,9 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock *Dest = 0;
if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
Dest = I->getOperand(2).getMBB();
+ else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
+ !I->getOperand(1).isImm())
+ Dest = I->getOperand(1).getMBB();
else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
!I->getOperand(0).isImm())
@@ -166,6 +169,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BC) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BCn) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ) {
BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ8) {
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4224ae2..9c5db50 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -26,28 +26,28 @@
#define DEBUG_TYPE "ctrloops"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "PPCTargetMachine.h"
-#include "PPC.h"
#ifndef NDEBUG
#include "llvm/CodeGen/MachineDominators.h"
@@ -96,8 +96,8 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
}
@@ -109,7 +109,7 @@ namespace {
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
};
@@ -145,7 +145,7 @@ namespace {
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
@@ -170,8 +170,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
bool MadeChange = false;
@@ -186,6 +187,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
return MadeChange;
}
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+}
+
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
@@ -352,13 +360,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- (TT.isArch32Bit() &&
- (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
- CI->getDestTy()->getScalarType()->isIntegerTy(64))
- ))
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
return true;
- } else if (TT.isArch32Bit() &&
- J->getType()->getScalarType()->isIntegerTy(64) &&
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
(J->getOpcode() == Instruction::UDiv ||
J->getOpcode() == Instruction::SDiv ||
J->getOpcode() == Instruction::URem ||
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index e8e7f4c..d48164d 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -15,6 +15,8 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
: CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+class CCIfNotSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Convention
@@ -23,7 +25,8 @@ class CCIfSubtarget<string F, CCAction A>
// Return-value convention for PowerPC
def RetCC_PPC : CallingConv<[
// On PPC64, integer return values are always promoted to i64
- CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
@@ -33,7 +36,8 @@ def RetCC_PPC : CallingConv<[
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
// Vector types are always returned in V2.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
+ CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
]>;
@@ -46,6 +50,7 @@ def RetCC_PPC : CallingConv<[
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
@@ -58,6 +63,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
@@ -65,7 +71,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
+ CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
]>;
//===----------------------------------------------------------------------===//
@@ -73,6 +80,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_PPC32_SVR4_Common : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
@@ -97,7 +106,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
// This calling convention puts vector arguments always on the stack. It is used
@@ -113,6 +122,9 @@ def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
+ VSH10, VSH11, VSH12, VSH13]>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 418736e..84fc888 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -121,7 +121,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
default:
MCE.emitWordBE(getBinaryCodeForInstr(MI));
break;
- case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ break;
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 09117e7..dd45683 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -15,10 +15,10 @@
#define DEBUG_TYPE "ppcfastisel"
#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
@@ -28,12 +28,12 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -80,7 +80,7 @@ typedef struct Address {
}
} Address;
-class PPCFastISel : public FastISel {
+class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const TargetInstrInfo &TII;
@@ -127,7 +127,6 @@ class PPCFastISel : public FastISel {
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
- bool SelectCmp(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectIToFP(const Instruction *I, bool IsSigned);
@@ -325,11 +324,11 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
II != IE; ++II, ++GTI) {
const Value *Op = *II;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
- uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
@@ -407,7 +406,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
// register and continue. This should almost never happen.
if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
Addr.Base.Reg = ResultReg;
Addr.BaseType = Address::RegBase;
@@ -499,13 +498,13 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
@@ -529,7 +528,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
case PPC::LFS: Opc = PPC::LFSX; break;
case PPC::LFD: Opc = PPC::LFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Addr.Base.Reg).addReg(IndexReg);
}
@@ -615,12 +614,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
- .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg)
+ .addImm(Addr.Offset)
+ .addFrameIndex(Addr.Base.FI)
+ .addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
@@ -640,7 +642,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
}
@@ -704,9 +706,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
CondReg))
return false;
- BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- FastEmitBranch(FBB, DL);
+ FastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -714,7 +716,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DL);
+ FastEmitBranch(Target, DbgLoc);
return true;
}
@@ -737,6 +739,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
+ if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits())
+ return false;
+
// See if operand 2 is an immediate encodeable in the compare.
// FIXME: Operands are not in canonical order at -O0, so an immediate
// operand in position 1 is a lost opportunity for now. We are
@@ -811,10 +816,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
if (!UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addReg(SrcReg2);
else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addImm(Imm);
return true;
@@ -853,7 +858,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
UpdateValueMap(I, DestReg);
@@ -892,11 +897,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
unsigned LoadOpc = PPC::LFD;
if (SrcVT == MVT::i32) {
- Addr.Offset = 4;
- if (!IsSigned)
+ if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- else if (PPCSubTarget.hasLFIWAX())
+ Addr.Offset = 4;
+ } else if (PPCSubTarget.hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
+ Addr.Offset = 4;
+ }
}
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
@@ -970,7 +977,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
// Generate the convert.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
UpdateValueMap(I, DestReg);
@@ -1042,7 +1049,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
if (InRC == &PPC::F4RCRegClass) {
unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
.addReg(SrcReg).addImm(PPC::F8RCRegClassID);
SrcReg = TmpReg;
@@ -1062,7 +1069,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
// Generate the convert.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
@@ -1155,8 +1162,10 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
if (UseImm) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
- .addReg(SrcReg1).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg)
+ .addReg(SrcReg1)
+ .addImm(Imm);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1171,7 +1180,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
if (ISDOpcode == ISD::SUB)
std::swap(SrcReg1, SrcReg2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
UpdateValueMap(I, ResultReg);
return true;
@@ -1198,7 +1207,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Skip vector arguments for now, as well as long double and
// uint128_t, and anything that isn't passed in a register.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
!VA.isRegLoc() || VA.needsCustom())
return false;
@@ -1211,7 +1220,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
.addImm(NumBytes);
@@ -1270,9 +1279,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
++NextGPR;
} else
ArgReg = NextGPR++;
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ArgReg).addReg(Arg);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
RegArgs.push_back(ArgReg);
}
@@ -1285,7 +1294,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg) {
// Issue CallSEQ_END.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameDestroyOpcode()))
.addImm(NumBytes).addImm(0);
@@ -1315,14 +1324,14 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
ResultReg = createResultReg(CpyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
ResultReg).addReg(SourcePhysReg);
// If only the low half of a general register is needed, generate
@@ -1333,7 +1342,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
}
@@ -1440,7 +1449,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
if (Arg == 0)
return false;
- unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
Args.push_back(*II);
@@ -1465,7 +1474,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
@@ -1522,8 +1531,8 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
const Constant *C = cast<Constant>(RV);
unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
unsigned RetReg = ValLocs[0].getLocReg();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- RetReg).addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
RetRegs.push_back(RetReg);
} else {
@@ -1578,14 +1587,14 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
}
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetRegs[i])
.addReg(SrcReg);
}
}
}
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
@@ -1615,7 +1624,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
Opc = PPC::EXTSW_32_64;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Unsigned 32-bit extensions use RLWINM.
@@ -1627,7 +1636,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
MB = 16;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
@@ -1640,7 +1649,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
MB = 48;
else
MB = 32;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::RLDICL_32_64), DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
}
@@ -1654,9 +1663,9 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
if (AddrReg == 0)
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
.addReg(AddrReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
@@ -1684,7 +1693,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
// The only interesting case is when we need to switch register classes.
if (SrcVT == MVT::i64) {
unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
ResultReg).addReg(SrcReg, 0, PPC::sub_32);
SrcReg = ResultReg;
}
@@ -1791,7 +1801,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
return 0;
// All FP constants are loaded from the constant pool.
- unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
assert(Align > 0 && "Unexpectedly missing alignment information!");
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -1807,25 +1817,25 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
TmpReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
if (CModel == CodeModel::Large) {
unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg2);
} else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
.addReg(TmpReg)
.addMemOperand(MMO);
@@ -1853,7 +1863,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
if (!GVar) {
// If GV is an alias, use the aliasee for determining thread-locality.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal());
}
// FIXME: We don't yet handle the complexity of TLS.
@@ -1863,8 +1873,10 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
- .addGlobalAddress(GV).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
+ DestReg)
+ .addGlobalAddress(GV)
+ .addReg(PPC::X2);
else {
// If the address is an externally defined symbol, a symbol with
// common or externally available linkage, a function address, or a
@@ -1875,7 +1887,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// ADDItocL(ADDIStocHA(%X2, GV), GV)
// Either way, start with the ADDIStocHA:
unsigned HighPartReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
// !GVar implies a function address. An external variable is one
@@ -1884,11 +1896,11 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// on the "if" path here.
if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
// Otherwise generate the ADDItocL.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
@@ -1906,21 +1918,21 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
if (isInt<16>(Imm))
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
.addImm(Imm);
else if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
.addImm(Hi);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
.addReg(TmpReg).addImm(Lo);
} else
// Just Hi bits.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
.addImm(Hi);
@@ -1960,7 +1972,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
unsigned TmpReg2;
if (Imm) {
TmpReg2 = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
} else
TmpReg2 = TmpReg1;
@@ -1968,14 +1980,14 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
unsigned TmpReg3, Hi, Lo;
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
TmpReg3 = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
TmpReg3).addReg(TmpReg2).addImm(Hi);
} else
TmpReg3 = TmpReg2;
if ((Lo = Remainder & 0xFFFF)) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
ResultReg).addReg(TmpReg3).addImm(Lo);
return ResultReg;
}
@@ -1987,6 +1999,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget.useCRBits()) {
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
@@ -2000,7 +2021,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
if (isInt<16>(CI->getSExtValue())) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
.addImm(CI->getSExtValue());
return ImmReg;
}
@@ -2049,7 +2070,7 @@ unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(SI->second).addImm(0);
return ResultReg;
}
@@ -2152,6 +2173,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget.useCRBits()) {
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
return 0;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0ac2ced..d8f491f 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -353,9 +353,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert((isDarwinABI || isSVR4ABI) &&
"Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
- // Prepare for frame info.
- MCSymbol *FrameLabel = 0;
-
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
if (!isSVR4ABI)
@@ -561,36 +558,37 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Add the "machine moves" for the instructions we generated above, but in
// reverse order.
if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
-
// Show update of SP.
assert(NegFrameSize);
- MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
if (HasFP) {
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
if (HasBP) {
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
if (MustSaveLR) {
unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
- MCSymbol *ReadyLabel = 0;
-
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
BuildMI(MBB, MBBI, dl, OrInst, FPReg)
@@ -598,19 +596,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(SPReg);
if (needsFrameMoves) {
- ReadyLabel = MMI.getContext().CreateTempSymbol();
-
// Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
-
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
if (needsFrameMoves) {
- MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
-
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
@@ -631,14 +627,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
- MMI.addFrameInst(MCCFIInstruction::createOffset(
- Label, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- MMI.addFrameInst(MCCFIInstruction::createOffset(
- Label, MRI->getDwarfRegNum(Reg, true), Offset));
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 0df50e1..37c85b3 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -15,34 +15,220 @@
#include "PPCHazardRecognizers.h"
#include "PPC.h"
#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-//===----------------------------------------------------------------------===//
-// PowerPC Scoreboard Hazard Recognizer
-void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
+ // FIXME: Move this.
+ if (isBCTRAfterSet(SU))
+ return true;
+
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
if (!MCID)
- // This is a PPC pseudo-instruction.
- return;
+ return false;
+
+ if (!MCID->mayLoad())
+ return false;
+
+ // SU is a load; for any predecessors in this dispatch group, that are stores,
+ // and with which we have an ordering dependency, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || !PredMCID->mayStore())
+ continue;
+
+ if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->isBranch())
+ return false;
+
+ // SU is a branch; for any predecessors in this dispatch group, with which we
+ // have a data dependence and set the counter register, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
+ continue;
+
+ if (SU->Preds[i].isCtrl())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+// FIXME: Remove this when we don't need this:
+namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
+
+// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
+
+bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
+ unsigned &NSlots) {
+ // FIXME: Indirectly, this information is contained in the itinerary, and
+ // we should derive it from there instead of separately specifying it
+ // here.
+ unsigned IIC = MCID->getSchedClass();
+ switch (IIC) {
+ default:
+ NSlots = 1;
+ break;
+ case PPC::Sched::IIC_IntDivW:
+ case PPC::Sched::IIC_IntDivD:
+ case PPC::Sched::IIC_LdStLoadUpd:
+ case PPC::Sched::IIC_LdStLDU:
+ case PPC::Sched::IIC_LdStLFDU:
+ case PPC::Sched::IIC_LdStLFDUX:
+ case PPC::Sched::IIC_LdStLHA:
+ case PPC::Sched::IIC_LdStLHAU:
+ case PPC::Sched::IIC_LdStLWA:
+ case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTFDU:
+ NSlots = 2;
+ break;
+ case PPC::Sched::IIC_LdStLoadUpdX:
+ case PPC::Sched::IIC_LdStLDUX:
+ case PPC::Sched::IIC_LdStLHAUX:
+ case PPC::Sched::IIC_LdStLWARX:
+ case PPC::Sched::IIC_LdStLDARX:
+ case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTDCX:
+ case PPC::Sched::IIC_LdStSTWCX:
+ case PPC::Sched::IIC_BrMCRX: // mtcr
+ // FIXME: Add sync/isync (here and in the itinerary).
+ NSlots = 4;
+ break;
+ }
- ScoreboardHazardRecognizer::EmitInstruction(SU);
+ // FIXME: record-form instructions need a different itinerary class.
+ if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
+ NSlots = 2;
+
+ switch (IIC) {
+ default:
+ // All multi-slot instructions must come first.
+ return NSlots > 1;
+ case PPC::Sched::IIC_BrCR: // cr logicals
+ case PPC::Sched::IIC_SprMFCR:
+ case PPC::Sched::IIC_SprMFCRF:
+ case PPC::Sched::IIC_SprMTSPR:
+ return true;
+ }
}
ScheduleHazardRecognizer::HazardType
-PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (Stalls == 0 && isLoadAfterStore(SU))
+ return NoopHazard;
+
return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
}
-void PPCScoreboardHazardRecognizer::AdvanceCycle() {
- ScoreboardHazardRecognizer::AdvanceCycle();
+bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ unsigned NSlots;
+ if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
+ return true;
+
+ return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
+}
+
+unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ // We only need to fill out a maximum of 5 slots here: The 6th slot could
+ // only be a second branch, and otherwise the next instruction will start a
+ // new group.
+ if (isLoadAfterStore(SU) && CurSlots < 6) {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If we're using a special group-terminating nop, then we need only one.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
+ return 1;
+
+ return 5 - CurSlots;
+ }
+
+ return ScoreboardHazardRecognizer::PreEmitNoops(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID) {
+ if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
+ SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+
+ unsigned NSlots;
+ bool MustBeFirst = mustComeFirst(MCID, NSlots);
+
+ // If this instruction must come first, but does not, then it starts a
+ // new group.
+ if (MustBeFirst && CurSlots) {
+ CurSlots = CurBranches = 0;
+ CurGroup.clear();
+ }
+
+ CurSlots += NSlots;
+ CurGroup.push_back(SU);
+
+ if (MCID->isBranch())
+ ++CurBranches;
+ }
+ }
+
+ return ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
+ return ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("Bottom-up scheduling not supported");
}
-void PPCScoreboardHazardRecognizer::Reset() {
- ScoreboardHazardRecognizer::Reset();
+void PPCDispatchGroupSBHazardRecognizer::Reset() {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ return ScoreboardHazardRecognizer::Reset();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If the group has now filled all of its slots, or if we're using a special
+ // group-terminating nop, the group is complete.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ CurSlots == 6) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ CurGroup.push_back(0);
+ ++CurSlots;
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 84b8e6d..6b7fe41 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -21,19 +21,30 @@
namespace llvm {
-/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
-/// hazard recognizer for generic PPC processors.
-class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
+/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
+class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
const ScheduleDAG *DAG;
+ SmallVector<SUnit *, 7> CurGroup;
+ unsigned CurSlots, CurBranches;
+
+ bool isLoadAfterStore(SUnit *SU);
+ bool isBCTRAfterSet(SUnit *SU);
+ bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
public:
- PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+ PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG_) :
- ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
+ CurSlots(0), CurBranches(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual bool ShouldPreferAnother(SUnit* SU);
+ virtual unsigned PreEmitNoops(SUnit *SU);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
+ virtual void RecedeCycle();
virtual void Reset();
+ virtual void EmitNoop();
};
/// PPCHazardRecognizer970 - This class defines a finite state automata that
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6ba6af6..3bbc839 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -34,6 +35,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+// FIXME: Remove this once the bug has been fixed!
+cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
+cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -181,6 +186,12 @@ namespace {
private:
SDNode *SelectSETCC(SDNode *N);
+
+ void PeepholePPC64();
+ void PeepholdCROps();
+
+ bool AllUsersSelectZero(SDNode *N);
+ void SwapAllSelectUsers(SDNode *N);
};
}
@@ -261,11 +272,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
DebugLoc dl;
if (PPCLowering.getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
} else {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
@@ -561,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
- Opc = PPC::FCMPUD;
+ Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
@@ -629,7 +640,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
+static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
+ bool HasVSX) {
switch (CC) {
case ISD::SETEQ:
case ISD::SETUEQ:
@@ -643,7 +655,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETLT:
case ISD::SETGT:
@@ -656,7 +670,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETULT:
case ISD::SETUGT:
@@ -671,17 +687,23 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
break;
case ISD::SETOEQ:
if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETOLT:
case ISD::SETOGT:
case ISD::SETOLE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETOGE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGEFP;
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
break;
default:
break;
@@ -692,7 +714,7 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
// getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
+static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
switch (VecVT) {
case MVT::v16i8:
return PPC::VCMPEQUB;
@@ -701,13 +723,14 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
case MVT::v4i32:
return PPC::VCMPEQUW;
case MVT::v4f32:
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ case MVT::v2f64:
+ return PPC::XVCMPEQDP;
default:
llvm_unreachable("Invalid integer vector compare condition");
}
}
-
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
@@ -715,7 +738,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- if (isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!PPCSubTarget.useCRBits() &&
+ isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
@@ -796,7 +820,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC);
+ unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX());
switch (CC) {
case ISD::SETEQ:
@@ -807,7 +831,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETONE:
case ISD::SETUNE: {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
}
case ISD::SETLT:
case ISD::SETOLT:
@@ -827,24 +853,31 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR :
+ PPC::VOR,
+ VecVT, VCmpGT, VCmpEQ);
}
}
case ISD::SETLE:
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR :
+ PPC::VOR,
+ VecVT, VCmpLE, VCmpEQ);
}
default:
llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
}
}
+ if (PPCSubTarget.useCRBits())
+ return 0;
+
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
@@ -959,8 +992,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::SETCC:
- return SelectSETCC(N);
+ case ISD::SETCC: {
+ SDNode *SN = SelectSETCC(N);
+ if (SN)
+ return SN;
+ break;
+ }
case PPCISD::GlobalBaseReg:
return getGlobalBaseReg();
@@ -1122,7 +1159,21 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isMask_64(Imm64)) {
SDValue Val = N->getOperand(0);
MB = 64 - CountTrailingOnes_64(Imm64);
- SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
+ SH = 0;
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
}
// AND X, 0 -> 0, not "rlwinm 32".
@@ -1179,11 +1230,39 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ case PPCISD::ANDIo_1_EQ_BIT:
+ case PPCISD::ANDIo_1_GT_BIT: {
+ if (!ANDIGlueBug)
+ break;
+
+ EVT InVT = N->getOperand(0).getValueType();
+ assert((InVT == MVT::i64 || InVT == MVT::i32) &&
+ "Invalid input type for ANDIo_1_EQ_BIT");
+
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
+ N->getOperand(0),
+ CurDAG->getTargetConstant(1, InVT)), 0);
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
+ PPC::sub_eq : PPC::sub_gt, MVT::i32);
+
+ return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
+ CR0Reg, SRIdxVal,
+ SDValue(AndI.getNode(), 1) /* glue */);
+ }
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
+ // If this is a select of i1 operands, we'll pattern match it.
+ if (PPCSubTarget.useCRBits() &&
+ N->getOperand(0).getValueType() == MVT::i1)
+ break;
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -1202,6 +1281,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+
+ if (N->getValueType(0) == MVT::i1) {
+ // An i1 select is: (c & t) | (!c & f).
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+
+ unsigned SRI;
+ switch (Idx) {
+ default: llvm_unreachable("Invalid CC index");
+ case 0: SRI = PPC::sub_lt; break;
+ case 1: SRI = PPC::sub_gt; break;
+ case 2: SRI = PPC::sub_eq; break;
+ case 3: SRI = PPC::sub_un; break;
+ }
+
+ SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
+
+ SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
+ CCBit, CCBit), 0);
+ SDValue C = Inv ? NotCCBit : CCBit,
+ NotC = Inv ? CCBit : NotCCBit;
+
+ SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ C, N->getOperand(2)), 0);
+ SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ NotC, N->getOperand(3)), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
+ }
+
unsigned BROpc = getPredicateForSetCC(CC);
unsigned SelectCCOp;
@@ -1220,6 +1329,50 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getI32Imm(BROpc) };
return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
}
+ case ISD::VSELECT:
+ if (PPCSubTarget.hasVSX()) {
+ SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops, 3);
+ }
+
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ if (PPCSubTarget.hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
+ Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
+ unsigned DM[2];
+
+ for (int i = 0; i < 2; ++i)
+ if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
+ DM[i] = 0;
+ else
+ DM[i] = 1;
+
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
+
+ if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
+ Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<LoadSDNode>(Op1.getOperand(0))) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
+ SDValue Base, Offset;
+
+ if (LD->isUnindexed() &&
+ SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
+ SDValue Chain = LD->getChain();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops, 3);
+ }
+ }
+
+ SDValue Ops[] = { Op1, Op2, DMV };
+ return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops, 3);
+ }
+
+ break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
bool IsPPC64 = PPCSubTarget.isPPC64();
@@ -1244,8 +1397,30 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ unsigned PCC = getPredicateForSetCC(CC);
+
+ if (N->getOperand(2).getValueType() == MVT::i1) {
+ unsigned Opc;
+ bool Swap;
+ switch (PCC) {
+ default: llvm_unreachable("Unexpected Boolean-operand predicate");
+ case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
+ case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
+ case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
+ case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
+ case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
+ case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
+ }
+
+ SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
+ N->getOperand(Swap ? 3 : 2),
+ N->getOperand(Swap ? 2 : 3)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
+ BitComp, N->getOperand(4), N->getOperand(0));
+ }
+
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
- SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ SDValue Ops[] = { getI32Imm(PCC), CondCode,
N->getOperand(4), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
}
@@ -1288,8 +1463,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalValue *RealGValue =
+ GAlias ? GAlias->getAliasedGlobal() : GValue;
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
assert((GVar || isa<Function>(RealGValue)) &&
"Unexpected global value subclass!");
@@ -1382,7 +1557,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
-/// PostProcessISelDAG - Perform some late peephole optimizations
+/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
@@ -1390,6 +1565,478 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
if (TM.getOptLevel() == CodeGenOpt::None)
return;
+ PeepholePPC64();
+ PeepholdCROps();
+}
+
+// Check if all users of this node will become isel where the second operand
+// is the constant zero. If this is so, and if we can negate the condition,
+// then we can flip the true and false operands. This will allow the zero to
+// be folded with the isel so that we don't need to materialize a register
+// containing zero.
+bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
+ // If we're not using isel, then this does not matter.
+ if (!PPCSubTarget.hasISEL())
+ return false;
+
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (!User->isMachineOpcode())
+ return false;
+ if (User->getMachineOpcode() != PPC::SELECT_I4 &&
+ User->getMachineOpcode() != PPC::SELECT_I8)
+ return false;
+
+ SDNode *Op2 = User->getOperand(2).getNode();
+ if (!Op2->isMachineOpcode())
+ return false;
+
+ if (Op2->getMachineOpcode() != PPC::LI &&
+ Op2->getMachineOpcode() != PPC::LI8)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
+ if (!C)
+ return false;
+
+ if (!C->isNullValue())
+ return false;
+ }
+
+ return true;
+}
+
+void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
+ SmallVector<SDNode *, 4> ToReplace;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
+ User->getMachineOpcode() == PPC::SELECT_I8) &&
+ "Must have all select users");
+ ToReplace.push_back(User);
+ }
+
+ for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
+ UE = ToReplace.end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ SDNode *ResNode =
+ CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
+ User->getValueType(0), User->getOperand(0),
+ User->getOperand(2),
+ User->getOperand(1));
+
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(User->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(User, ResNode);
+ }
+}
+
+void PPCDAGToDAGISel::PeepholdCROps() {
+ bool IsModified;
+ do {
+ IsModified = false;
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ if (!MachineNode || MachineNode->use_empty())
+ continue;
+ SDNode *ResNode = MachineNode;
+
+ bool Op1Set = false, Op1Unset = false,
+ Op1Not = false,
+ Op2Set = false, Op2Unset = false,
+ Op2Not = false;
+
+ unsigned Opcode = MachineNode->getMachineOpcode();
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ case PPC::CRNAND:
+ case PPC::CROR:
+ case PPC::CRXOR:
+ case PPC::CRNOR:
+ case PPC::CREQV:
+ case PPC::CRANDC:
+ case PPC::CRORC: {
+ SDValue Op = MachineNode->getOperand(1);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op2Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op2Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op2Not = true;
+ }
+ } // fallthrough
+ case PPC::BC:
+ case PPC::BCn:
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC: {
+ SDValue Op = MachineNode->getOperand(0);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op1Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op1Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op1Not = true;
+ }
+ }
+ break;
+ }
+
+ bool SelectSwap = false;
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x & x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set)
+ // 1 & y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // x & 1 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset || Op2Unset)
+ // x & 0 = 0 & y = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // ~x & y = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x & ~y = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // nand(x, x) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Set)
+ // nand(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // nand(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset || Op2Unset)
+ // nand(x, 0) = nand(0, y) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nand(x, ~y) = ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CROR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x | x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set || Op2Set)
+ // x | 1 = 1 | y = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // 0 | y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // x | 0 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRXOR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // xor(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // xor(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // xor(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset)
+ // xor(0, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // xor(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // xor(~x, y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // xor(x, ~y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNOR:
+ if (Op1Set || Op2Set)
+ // nor(1, y) -> 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // nor(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // nor(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // nor(~x, y) = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nor(x, ~y) = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CREQV:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // eqv(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // eqv(1, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // eqv(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // eqv(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // eqv(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // eqv(~x, y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // eqv(x, ~y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRANDC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // andc(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // andc(1, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Unset || Op2Set)
+ // andc(0, y) = andc(x, 1) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Unset)
+ // andc(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // andc(~x, y) = ~(x | y) = nor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // andc(x, ~y) = x & y
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::CRORC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // orc(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set || Op2Unset)
+ // orc(1, y) = orc(x, 0) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Set)
+ // orc(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // orc(0, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Not)
+ // orc(~x, y) = ~(x & y) = nand(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // orc(x, ~y) = x | y
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC:
+ if (Op1Set)
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op1Unset)
+ ResNode = MachineNode->getOperand(2).getNode();
+ else if (Op1Not)
+ ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
+ SDLoc(MachineNode),
+ MachineNode->getValueType(0),
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(2),
+ MachineNode->getOperand(1));
+ break;
+ case PPC::BC:
+ case PPC::BCn:
+ if (Op1Not)
+ ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
+ PPC::BC,
+ SDLoc(MachineNode),
+ MVT::Other,
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1),
+ MachineNode->getOperand(2));
+ // FIXME: Handle Op1Set, Op1Unset here too.
+ break;
+ }
+
+ // If we're inverting this node because it is used only by selects that
+ // we'd like to swap, then swap the selects before the node replacement.
+ if (SelectSwap)
+ SwapAllSelectUsers(MachineNode);
+
+ if (ResNode != MachineNode) {
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(MachineNode->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(MachineNode, ResNode);
+ IsModified = true;
+ }
+ }
+ if (IsModified)
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
+}
+
+void PPCDAGToDAGISel::PeepholePPC64() {
// These optimizations are currently supported only for 64-bit SVR4.
if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
return;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 8da5f05..32ac1dc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -46,6 +46,9 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -94,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ if (Subtarget->useCRBits()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (isPPC64 || Subtarget->hasFPCVT()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+ }
+
+ // PowerPC does not support direct load / store of condition registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ if (ANDIGlueBug)
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+ }
+
// This is used in the ppcf128->int sequence. Note it has different semantics
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
@@ -191,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- // PowerPC does not have Select
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ if (!Subtarget->useCRBits()) {
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ if (!Subtarget->useCRBits())
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ if (!Subtarget->useCRBits())
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -445,7 +485,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i32,
+ Subtarget->useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -464,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath) {
+ if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -491,6 +532,83 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget->hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+ // Vector operation legalization checks the result type of
+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+ }
}
if (Subtarget->has64BitSupport()) {
@@ -522,9 +640,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
+ if (Subtarget->useCRBits())
+ setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ if (Subtarget->useCRBits()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ }
+
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
setTargetDAGCombine(ISD::FDIV);
@@ -545,6 +675,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
+ // With 32 condition bits, we don't need to sink (and duplicate) compares
+ // aggressively in CodeGenPrep.
+ if (Subtarget->useCRBits())
+ setHasMultipleConditionRegisters();
+
setMinFunctionAlignment(2);
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
@@ -670,6 +805,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
@@ -688,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return MVT::i32;
+ return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
@@ -754,8 +890,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
@@ -792,8 +928,8 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -1431,18 +1567,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
- if (!is64bit)
- llvm_unreachable("only local-exec is currently supported for ppc32");
-
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
- PtrVT, GOTReg, TGA);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ } else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
- PtrVT, TGA, TPOffsetHi);
+ PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
@@ -1534,6 +1671,27 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
+ if (Op.getValueType() == MVT::v2i64) {
+ // When the operands themselves are v2i64 values, we need to do something
+ // special because VSX has no underlying comparison operations for these.
+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+ // Equality can be handled by casting to the legal type for Altivec
+ // comparisons, everything else needs to be expanded.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getSetCC(dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+ CC));
+ }
+
+ return SDValue();
+ }
+
+ // We handle most of these in the usual way.
+ return Op;
+ }
+
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
@@ -1922,7 +2080,7 @@ static const uint16_t *GetFPR() {
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
- unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ unsigned ArgSize = ArgVT.getStoreSize();
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -2020,6 +2178,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i1:
case MVT::i32:
RC = &PPC::GPRCRegClass;
break;
@@ -2027,7 +2186,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = &PPC::F8RCRegClass;
+ if (PPCSubTarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
@@ -2035,18 +2197,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ RC = &PPC::VSHRCRegClass;
+ break;
}
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
InVals.push_back(ArgValue);
} else {
// Argument stored in memory.
assert(VA.isMemLoc());
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = VA.getLocVT().getStoreSize();
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
isImmutable);
@@ -2182,7 +2352,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
// Set the size that is at least reserved in caller of this function. Tail
@@ -2246,6 +2416,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const uint16_t VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
@@ -2265,7 +2439,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
@@ -2275,7 +2449,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
+ ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
@@ -2333,7 +2508,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
@@ -2345,7 +2520,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg),
false, false, 0);
}
@@ -2369,7 +2544,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -2384,13 +2559,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2416,7 +2592,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ &PPC::VSFRCRegClass :
+ &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
@@ -2431,10 +2609,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
@@ -2581,6 +2763,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
switch(ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::f32:
VecArgOffset += 4;
@@ -2665,8 +2848,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg,
- CurArgOffset),
+ MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -2690,7 +2872,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, ArgOffset),
+ MachinePointerInfo(FuncArg, j),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -2705,11 +2887,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i1)
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
++GPR_idx;
} else {
needsLoad = true;
@@ -2725,7 +2912,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2888,7 +3075,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
+ ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
@@ -2968,7 +3156,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Flags.isByVal()) return false;
}
- // Non PIC/GOT tail calls are supported.
+ // Non-PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return true;
@@ -3706,6 +3894,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
if (VA.isRegLoc()) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -3863,6 +4054,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const uint16_t VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -3884,7 +4080,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
// Promote integers to 64-bit values.
- if (Arg.getValueType() == MVT::i32) {
+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
@@ -4008,6 +4204,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
@@ -4068,6 +4265,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
@@ -4091,7 +4290,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -4111,7 +4316,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// stack space allocated at the end.
if (VR_idx != NumVRs) {
// Doesn't have GPR space allocated.
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
@@ -4339,9 +4549,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -4693,6 +4907,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
}
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 loads");
+
+ // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+ SDLoc dl(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand *MMO = LD->getMemOperand();
+
+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+ BasePtr, MVT::i8, MMO);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 stores");
+
+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+ SDLoc dl(Op);
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand *MMO = ST->getMemOperand();
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 results");
+
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+ Op.getOperand(0));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4859,6 +5122,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstantFP(1.0, Op.getValueType()),
+ DAG.getConstantFP(0.0, Op.getValueType()));
+
assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
@@ -5686,6 +5954,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Flags;
}
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+ // instructions), but for smaller types, we need to first extend up to v2i32
+ // before doing going farther.
+ if (Op.getValueType() == MVT::v2i64) {
+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (ExtVT != MVT::v2i32) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+ ExtVT.getVectorElementType(), 4)));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+ DAG.getValueType(MVT::v2i32));
+ }
+
+ return Op;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -5792,6 +6084,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
@@ -5810,6 +6105,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
@@ -5915,8 +6211,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -5984,8 +6279,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6137,7 +6431,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// Note that the structure of the jmp_buf used here is not compatible
@@ -6357,9 +6651,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
- Cond.push_back(MI->getOperand(4));
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)
+ Cond.push_back(MI->getOperand(4));
+ else
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
@@ -6371,9 +6671,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
-
-
+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -6387,23 +6690,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ if (MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
+ BuildMI(BB, dl, TII->get(PPC::BC))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ } else {
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ }
// copy0MBB:
// %FalseValue = ...
@@ -6505,8 +6816,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
@@ -6576,8 +6886,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6726,6 +7035,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+ PPC::ANDIo8 : PPC::ANDIo;
+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+ &PPC::GPRCRegClass :
+ &PPC::G8RCRegClass);
+
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ .addReg(MI->getOperand(1).getReg()).addImm(1);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6747,7 +7077,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -6809,7 +7140,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -6980,6 +7312,536 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ assert(PPCSubTarget.useCRBits() &&
+ "Expecting to be tracking CR bits");
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // trunc(binary-ops(zext(x), zext(y)))
+ // or
+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+ // such that we're unnecessarily moving things into GPRs when it would be
+ // better to keep them in CR bits.
+
+ // Note that trunc here can be an actual i1 trunc, or can be the effective
+ // truncation that comes from a setcc or select_cc.
+ if (N->getOpcode() == ISD::TRUNCATE &&
+ N->getValueType(0) != MVT::i1)
+ return SDValue();
+
+ if (N->getOperand(0).getValueType() != MVT::i32 &&
+ N->getOperand(0).getValueType() != MVT::i64)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::SETCC ||
+ N->getOpcode() == ISD::SELECT_CC) {
+ // If we're looking at a comparison, then we need to make sure that the
+ // high bits (all except for the first) don't matter the result.
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(
+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+ return SDValue();
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (!DAG.MaskedValueIsZero(N->getOperand(0),
+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+ !DAG.MaskedValueIsZero(N->getOperand(1),
+ APInt::getHighBitsSet(OpBits, OpBits-1)))
+ return SDValue();
+ } else {
+ // This is neither a signed nor an unsigned comparison, just make sure
+ // that the high bits are equal.
+ APInt Op1Zero, Op1One;
+ APInt Op2Zero, Op2One;
+ DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One);
+
+ // We don't really care about what is known about the first bit (if
+ // anything), so clear it in all masks prior to comparing them.
+ Op1Zero.clearBit(0); Op1One.clearBit(0);
+ Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+ if (Op1Zero != Op2Zero || Op1One != Op2One)
+ return SDValue();
+ }
+ }
+
+ // We now know that the higher-order bits are irrelevant, we just need to
+ // make sure that all of the intermediate operations are bit operations, and
+ // all inputs are extensions.
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+ N->getOperand(1).getOpcode() != ISD::AND &&
+ N->getOperand(1).getOpcode() != ISD::OR &&
+ N->getOperand(1).getOpcode() != ISD::XOR &&
+ N->getOperand(1).getOpcode() != ISD::SELECT &&
+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps, PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(N->getOperand(i)))
+ Inputs.push_back(N->getOperand(i));
+ else
+ BinOps.push_back(N->getOperand(i));
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ break;
+ }
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by extensions.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not an extension or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ // Replace all inputs with the extension operand.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constants may have users outside the cluster of to-be-promoted nodes,
+ // and so we need to replace those as we do the promotions.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (i1) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first. Any intermediate truncations or
+ // extensions disappear.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ if (PromOp.getOpcode() == ISD::TRUNCATE ||
+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+ PromOp.getOpcode() == ISD::ANY_EXTEND) {
+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+ PromOp.getOperand(0).getValueType() != MVT::i1) {
+ // The operand is not yet ready (see comment below).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SDValue RepValue = PromOp.getOperand(0);
+ if (isa<ConstantSDNode>(RepValue))
+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+ continue;
+ }
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != MVT::i1) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If there are any constant inputs, make sure they're replaced now.
+ for (unsigned i = 0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[C+i]))
+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1,
+ Ops.data(), Ops.size()));
+ }
+
+ // Now we're left with the initial truncation itself.
+ if (N->getOpcode() == ISD::TRUNCATE)
+ return N->getOperand(0);
+
+ // Otherwise, this is a comparison. The operands to be compared have just
+ // changed type (to i1), but everything else is the same.
+ return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // zext(binary-ops(trunc(x), trunc(y)))
+ // or
+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+ // such that we're unnecessarily moving things into CR bits that can more
+ // efficiently stay in GPRs. Note that if we're not certain that the high
+ // bits are set as required by the final extension, we still may need to do
+ // some masking to get the proper behavior.
+
+ // This same functionality is important on PPC64 when dealing with
+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+ // the return values of functions. Because it is so similar, it is handled
+ // here as well.
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+ PPCSubTarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 &&
+ PPCSubTarget.isPPC64())))
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by truncations.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not a truncation or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+ bool ReallyNeedsExt = false;
+ if (N->getOpcode() != ISD::ANY_EXTEND) {
+ // If all of the inputs are not already sign/zero extended, then
+ // we'll still need to do that at the end.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ unsigned OpBits =
+ Inputs[i].getOperand(0).getValueSizeInBits();
+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+ APInt::getHighBitsSet(OpBits,
+ OpBits-PromBits))) ||
+ (N->getOpcode() == ISD::SIGN_EXTEND &&
+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+ (OpBits-(PromBits-1)))) {
+ ReallyNeedsExt = true;
+ break;
+ }
+ }
+ }
+
+ // Replace all inputs, either with the truncation operand, or a
+ // truncation or extension to the final output type.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constant inputs need to be replaced with the to-be-promoted nodes that
+ // use them because they might have users outside of the cluster of
+ // promoted nodes.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ SDValue InSrc = Inputs[i].getOperand(0);
+ if (Inputs[i].getValueType() == N->getValueType(0))
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+ else if (N->getOpcode() == ISD::SIGN_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (promoted) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If this node has constant inputs, then they'll need to be promoted here.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!isa<ConstantSDNode>(Ops[C+i]))
+ continue;
+ if (Ops[C+i].getValueType() == N->getValueType(0))
+ continue;
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else
+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0),
+ Ops.data(), Ops.size()));
+ }
+
+ // Now we're left with the initial extension itself.
+ if (!ReallyNeedsExt)
+ return N->getOperand(0);
+
+ // To zero extend, just mask off everything except for the first bit (in the
+ // i1 case).
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+ DAG.getConstant(APInt::getLowBitsSet(
+ N->getValueSizeInBits(0), PromBits),
+ N->getValueType(0)));
+
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ "Invalid extension type");
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ SDValue ShiftCst =
+ DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+ return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+ N->getOperand(0), ShiftCst), ShiftCst);
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -7006,6 +7868,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ return DAGCombineExtBoolTrunc(N, DCI);
+ case ISD::TRUNCATE:
+ case ISD::SETCC:
+ case ISD::SELECT_CC:
+ return DAGCombineTruncBoolExt(N, DCI);
case ISD::FDIV: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
@@ -7204,7 +8074,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// you might suspect (sizeof(vector) bytes after the last requested
// load), but rather sizeof(vector) - 1 bytes after the last
// requested vector. The point of this is to avoid a page fault if the
- // base address happend to be aligned. This works because if the base
+ // base address happened to be aligned. This works because if the base
// address is aligned, then adding less than a full vector length will
// cause the last vector in the sequence to be (re)loaded. Otherwise,
// the next vector will be fetched as you might suspect was necessary.
@@ -7421,6 +8291,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::BRCOND: {
+ SDValue Cond = N->getOperand(1);
+ SDValue Target = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+ assert(Cond.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+ N->getOperand(0), Target);
+ }
+ }
+ break;
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
@@ -7585,6 +8474,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
// suboptimal.
return C_Memory;
}
+ } else if (Constraint == "wc") { // individual CR bits.
+ return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -7602,7 +8496,18 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
if (CallOperandVal == NULL)
return CW_Default;
Type *type = CallOperandVal->getType();
+
// Look at the constraint type.
+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+ return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
@@ -7658,6 +8563,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
+ } else if (Constraint == "wc") { // an individual CR bit.
+ return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
}
std::pair<unsigned, const TargetRegisterClass*> R =
@@ -7793,6 +8705,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7881,6 +8796,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -7894,8 +8810,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.getSimpleVT().isVector())
- return false;
+ if (VT.getSimpleVT().isVector()) {
+ if (PPCSubTarget.hasVSX()) {
+ if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+ } else {
+ return false;
+ }
+ }
if (VT == MVT::ppcf128)
return false;
@@ -7923,6 +8845,15 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+ EVT VT , unsigned DefinedValues) const {
+ if (VT == MVT::v2i64)
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index df3af35..da6d4dc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -19,8 +19,8 @@
#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -121,6 +121,12 @@ namespace llvm {
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+
// EH_SJLJ_SETJMP - SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
@@ -177,6 +183,10 @@ namespace llvm {
CR6SET,
CR6UNSET,
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// on PPC32.
+ PPC32_GOT,
+
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
@@ -457,7 +467,9 @@ namespace llvm {
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ bool *Fast = 0) const;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -465,6 +477,11 @@ namespace llvm {
/// expanded to fmul + fadd.
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
+ // Should we expand the build vector with shuffles?
+ virtual bool
+ shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const;
+
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
@@ -509,6 +526,9 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -520,6 +540,7 @@ namespace llvm {
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -625,6 +646,8 @@ namespace llvm {
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 46db4fe..b71c09e 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -19,11 +19,13 @@ def s16imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
}
def s17imm64 : Operand<i64> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -32,14 +34,11 @@ def s17imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
}
-def PPCTLSRegOperand : AsmOperandClass {
- let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
- let RenderMethod = "addTLSRegOperands";
-}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
let ParserMatchClass = PPCTLSRegOperand;
@@ -80,15 +79,22 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+ def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>,
+ def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -107,9 +113,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
- "bdzlr", BrB, []>;
+ "bdzlr", IIC_BrB, []>;
def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
- "bdnzlr", BrB, []>;
+ "bdnzlr", IIC_BrB, []>;
}
}
@@ -119,41 +125,58 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
- "bl $func", BrB, []>;
+ "bl $func", IIC_BrB, []>;
def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>;
}
let Uses = [RM], isCodeGenOnly = 1 in {
def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
- "bl $func\n\tnop", BrB, []>;
+ "bl $func\n\tnop", IIC_BrB, []>;
def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins tlscall:$func),
- "bl $func\n\tnop", BrB, []>;
+ "bl $func\n\tnop", IIC_BrB, []>;
def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins abscalltarget:$func),
- "bla $func\n\tnop", BrB,
+ "bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", BrB, [(PPCbctrl)]>,
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>,
- Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
}
}
} // Interpretation64Bit
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
+def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
// Calls
def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
(BL8 tglobaladdr:$dst)>;
@@ -199,16 +222,16 @@ let usesCustomInserter = 1 in {
// Instructions to support atomic operations
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", LdStLDARX,
+ "ldarx $rD, $ptr", IIC_LdStLDARX,
[(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", LdStSTDCX,
+ "stdcx. $rS, $dst", IIC_LdStSTDCX,
[(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi8 :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -225,28 +248,23 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
-let isCodeGenOnly = 1 in {
-
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
-def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
Requires<[In64BitMode]>;
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[]>;
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", BrB,
+ "ba $dst", IIC_BrB,
[]>;
-
-}
} // Interpretation64Bit
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
@@ -260,23 +278,23 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in {
def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
- "mtocrf $FXM, $ST", BrMCRX>,
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
- "mtcrf $FXM, $rS", BrMCRX>,
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
- "mfcr $rT", SprMFCR>,
+ "mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // neverHasSideEffects = 1
@@ -298,24 +316,24 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
let Uses = [CTR8] in {
def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
- "mfctr $rT", SprMFSPR>,
+ "mfctr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in {
+let hasSideEffects = 1, Defs = [CTR8] in {
let Pattern = [(int_ppc_mtctr i64:$rS)] in
def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
- "mfspr $rT, 268", SprMFTB>,
+ "mfspr $rT, 268", IIC_SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
// Note that encoding mftb using mfspr is now the preferred form,
// and has been since at least ISA v2.03. The mftb instruction has
@@ -329,12 +347,12 @@ def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#D
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
- "mtlr $rS", SprMTSPR>,
+ "mtlr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR8] in {
def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
- "mflr $rT", SprMFSPR>,
+ "mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
} // Interpretation64Bit
@@ -346,213 +364,236 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
let Interpretation64Bit = 1 in {
let neverHasSideEffects = 1 in {
+let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
- "li $rD, $imm", IntSimple,
+ "li $rD, $imm", IIC_IntSimple,
[(set i64:$rD, imm64SExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
- "lis $rD, $imm", IntSimple,
+ "lis $rD, $imm", IIC_IntSimple,
[(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
+let isCommutable = 1 in {
defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nand", "$rA, $rS, $rB", IntSimple,
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "and", "$rA, $rS, $rB", IntSimple,
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+} // isCommutable
defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "andc", "$rA, $rS, $rB", IntSimple,
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "or", "$rA, $rS, $rB", IntSimple,
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (or i64:$rS, i64:$rB))]>;
defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nor", "$rA, $rS, $rB", IntSimple,
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+} // isCommutable
defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "orc", "$rA, $rS, $rB", IntSimple,
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "eqv", "$rA, $rS, $rB", IntSimple,
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "xor", "$rA, $rS, $rB", IntSimple,
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+} // let isCommutable = 1
// Logical ops with immediate.
let Defs = [CR0] in {
-def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "andi. $dst, $src1, $src2", IntGeneral,
+def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
-def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "andis. $dst, $src1, $src2", IntGeneral,
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
}
-def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntSimple,
+def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntSimple,
+def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntSimple,
+def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntSimple,
+def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+let isCommutable = 1 in
defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "add", "$rT, $rA, $rB", IntSimple,
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB", IntSimple,
+ "add $rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "addc", "$rT, $rA, $rB", IntGeneral,
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
PPC970_DGroup_Cracked;
+
let Defs = [CARRY] in
def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "addic $rD, $rA, $imm", IntGeneral,
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
[(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntSimple,
+ "addi $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
- "addis $rD, $rA, $imm", IntSimple,
+ "addis $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "subfic $rD, $rA, $imm", IntGeneral,
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
[(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfc", "$rT, $rA, $rB", IntGeneral,
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
}
defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subf", "$rT, $rA, $rB", IntGeneral,
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "neg", "$rT, $rA", IntSimple,
+ "neg", "$rT, $rA", IIC_IntSimple,
[(set i64:$rT, (ineg i64:$rA))]>;
let Uses = [CARRY] in {
+let isCommutable = 1 in
defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "adde", "$rT, $rA, $rB", IntGeneral,
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addme", "$rT, $rA", IntGeneral,
+ "addme", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, -1))]>;
defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addze", "$rT, $rA", IntGeneral,
+ "addze", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, 0))]>;
defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfe", "$rT, $rA, $rB", IntGeneral,
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfme", "$rT, $rA", IntGeneral,
+ "subfme", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (sube -1, i64:$rA))]>;
defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfze", "$rT, $rA", IntGeneral,
+ "subfze", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (sube 0, i64:$rA))]>;
}
+} // isCodeGenOnly
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let isAsmParserOnly = 1 in
+def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple, []>;
+let isCommutable = 1 in {
defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhd", "$rT, $rA, $rB", IntMulHW,
+ "mulhd", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+ "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+} // isCommutable
}
} // Interpretation64Bit
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+ "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
- def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm),
- "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
- def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+ "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm),
+ "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "cmpldi $dst, $src1, $src2",
+ IIC_IntCompare>, isPPC64;
}
let neverHasSideEffects = 1 in {
defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "sld", "$rA, $rS, $rB", IntRotateD,
+ "sld", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srd", "$rA, $rS, $rB", IntRotateD,
+ "srd", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srad", "$rA, $rS, $rB", IntRotateD,
+ "srad", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsb", "$rA, $rS", IntSimple,
+ "extsb", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsh", "$rA, $rS", IntSimple,
+ "extsh", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
// For fast-isel:
let isCodeGenOnly = 1 in {
def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
- "extsb $rA, $rS", IntSimple, []>, isPPC64;
+ "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64;
def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
- "extsh $rA, $rS", IntSimple, []>, isPPC64;
+ "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64;
} // isCodeGenOnly for fast-isel
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsw", "$rA, $rS", IntSimple,
+ "extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
- "extsw", "$rA, $rS", IntSimple,
+ "extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext i32:$rS))]>, isPPC64;
defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
- "sradi", "$rA, $rS, $SH", IntRotateDI,
+ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
- "cntlzd", "$rA, $rS", IntGeneral,
+ "cntlzd", "$rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd $rA, $rS", IntGeneral,
+ "popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw $rA, $rS", IntGeneral,
+ "popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IntDivD,
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IntDivD,
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulld", "$rT, $rA, $rB", IntMulHD,
+ "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
[(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "mulli $rD, $rA, $imm", IntMulLI,
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
[(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
}
@@ -560,7 +601,7 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
(ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
@@ -568,43 +609,53 @@ defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
// Rotate instructions.
defm RLDCL : MDSForm_1r<30, 8,
(outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+ "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDCR : MDSForm_1r<30, 9,
(outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD,
+ "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
(outs g8rc:$rA),
(ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
defm RLDIC : MDForm_1r<30, 2,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
(ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>;
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+}
+
let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
(outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
[]>;
} // Interpretation64Bit
} // neverHasSideEffects = 1
@@ -618,111 +669,111 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
- "lha $rD, $src", LdStLHA,
+ "lha $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
- "lwa $rD, $src", LdStLWA,
+ "lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
(aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
- "lhax $rD, $src", LdStLHA,
+ "lhax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
- "lwax $rD, $src", LdStLHA,
+ "lwax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1 in {
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
- "lwa $rD, $src", LdStLWA, []>, isPPC64,
+ "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked;
def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
- "lwax $rD, $src", LdStLHA, []>, isPPC64,
+ "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64,
PPC970_DGroup_Cracked;
} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memri:$addr),
- "lhau $rD, $addr", LdStLHAU,
+ "lhau $rD, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLHAU,
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwaux $rD, $addr", LdStLHAU,
+ "lwaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
}
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStLoad,
+ "lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStLoad,
+ "lhz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStLoad,
+ "lwz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStLoad,
+ "lbzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStLoad,
+ "lhzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStLoad,
+ "lwzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoadUpd,
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoadUpd,
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoadUpd,
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoadUpd,
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoadUpd,
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoadUpd,
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -733,7 +784,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
- "ld $rD, $src", LdStLD,
+ "ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
// The following three definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
@@ -754,30 +805,30 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
let hasSideEffects = 1, isCodeGenOnly = 1 in {
let RST = 2, DS = 2 in
def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
- "ld 2, 8($reg)", LdStLD,
+ "ld 2, 8($reg)", IIC_LdStLD,
[(PPCload_toc i64:$reg)]>, isPPC64;
let RST = 2, DS = 10, RA = 1 in
def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
- "ld 2, 40(1)", LdStLD,
+ "ld 2, 40(1)", IIC_LdStLD,
[(PPCtoc_restore)]>, isPPC64;
}
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
- "ldx $rD, $src", LdStLD,
+ "ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
- "ldbrx $rD, $src", LdStLoad,
+ "ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
let mayLoad = 1, neverHasSideEffects = 1 in {
def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
- "ldu $rD, $addr", LdStLDU,
+ "ldu $rD, $addr", IIC_LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "ldux $rD, $addr", LdStLDU,
+ "ldux $rD, $addr", IIC_LdStLDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -860,78 +911,79 @@ def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
isPPC64;
let PPC970_Unit = 2 in {
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
- "stb $rS, $src", LdStStore,
+ "stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i64:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
- "sth $rS, $src", LdStStore,
+ "sth $rS, $src", IIC_LdStStore,
[(truncstorei16 i64:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
- "stw $rS, $src", LdStStore,
+ "stw $rS, $src", IIC_LdStStore,
[(truncstorei32 i64:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStStore,
+ "stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStStore,
+ "sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStStore,
+ "stwx $rS, $dst", IIC_LdStStore,
[(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
- "std $rS, $dst", LdStSTD,
+ "std $rS, $dst", IIC_LdStSTD,
[(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdx $rS, $dst", LdStSTD,
+ "stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdbrx $rS, $dst", LdStStore,
+ "stdbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1 in {
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", LdStSTDU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
+ "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", LdStSTDU, []>,
+ "stdux $rS, $dst", IIC_LdStSTDUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
}
@@ -966,29 +1018,29 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
let PPC970_Unit = 3, neverHasSideEffects = 1,
Uses = [RM] in { // FPU Operations.
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfid", "$frD, $frB", FPGeneral,
+ "fcfid", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctid", "$frD, $frB", FPGeneral,
+ "fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctidz", "$frD, $frB", FPGeneral,
+ "fctidz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfidu", "$frD, $frB", FPGeneral,
+ "fcfidu", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfids", "$frD, $frB", FPGeneral,
+ "fcfids", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfidus", "$frD, $frB", FPGeneral,
+ "fcfidus", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiduz", "$frD, $frB", FPGeneral,
+ "fctiduz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwuz", "$frD, $frB", FPGeneral,
+ "fctiwuz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
@@ -1006,6 +1058,14 @@ def : Pat<(i64 (anyext i32:$in)),
def : Pat<(i32 (trunc i64:$in)),
(EXTRACT_SUBREG $in, sub_32)>;
+// Implement the 'not' operation with the NOR instruction.
+// (we could use the default xori pattern, but nor has lower latency on some
+// cores (such as the A2)).
+def i64not : OutPatFrag<(ops node:$in),
+ (NOR8 $in, $in)>;
+def : Pat<(not i64:$in),
+ (i64not $in)>;
+
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
(LBZ8 iaddr:$src)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index a55abe3..2fd4a3e 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -164,7 +164,7 @@ def vecspltisw : PatLeaf<(build_vector), [{
// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
@@ -172,7 +172,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
@@ -180,14 +180,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set OutTy:$vD,
(IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
@@ -195,7 +195,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
@@ -203,13 +203,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
: VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set v4f32:$vD, (IntID v4f32:$vB))]>;
// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
@@ -217,7 +217,7 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
//===----------------------------------------------------------------------===//
@@ -229,116 +229,118 @@ let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStLoad /*FIXME*/, []>,
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStLoad /*FIXME*/, []>,
+ "dssall", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
- "mfvscr $vD", LdStStore,
+ "mfvscr $vD", IIC_LdStStore,
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
- "mtvscr $vB", LdStLoad,
+ "mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
- "lvebx $vD, $src", LdStLoad,
+ "lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src),
- "lvehx $vD, $src", LdStLoad,
+ "lvehx $vD, $src", IIC_LdStLoad,
[(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src),
- "lvewx $vD, $src", LdStLoad,
+ "lvewx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
- "lvx $vD, $src", LdStLoad,
+ "lvx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
- "lvxl $vD, $src", LdStLoad,
+ "lvxl $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src),
- "lvsl $vD, $src", LdStLoad,
+ "lvsl $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
- "lvsr $vD, $src", LdStLoad,
+ "lvsr $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvebx $rS, $dst", LdStStore,
+ "stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvehx $rS, $dst", LdStStore,
+ "stvehx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvewx $rS, $dst", LdStStore,
+ "stvewx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvx $rS, $dst", LdStStore,
+ "stvx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvxl $rS, $dst", LdStStore,
+ "stvxl $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
+let isCommutable = 1 in {
def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD,
(fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
// FIXME: The fma+fneg pattern won't match because fneg is not legal.
def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
- (fneg v4f32:$vB))))]>;
+ (fneg v4f32:$vB))))]>;
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
v8i16>;
def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
+} // isCommutable
def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
v4i32, v4i32, v16i8>;
@@ -346,23 +348,24 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
- "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
[(set v16i8:$vD,
(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
+let isCommutable = 1 in {
def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddfp $vD, $vA, $vB", VecFP,
+ "vaddfp $vD, $vA, $vB", IIC_VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddubm $vD, $vA, $vB", VecGeneral,
+ "vaddubm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduhm $vD, $vA, $vB", VecGeneral,
+ "vadduhm $vD, $vA, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduwm $vD, $vA, $vB", VecGeneral,
+ "vadduwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
@@ -372,30 +375,31 @@ def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
-
-
+} // isCommutable
+
+let isCommutable = 1 in
def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vand $vD, $vA, $vB", VecFP,
+ "vand $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vandc $vD, $vA, $vB", VecFP,
+ "vandc $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (and v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfsx $vD, $vB, $UIMM", VecFP,
+ "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfux $vD, $vB, $UIMM", VecFP,
+ "vcfux $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctsxs $vD, $vB, $UIMM", VecFP,
+ "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctuxs $vD, $vB, $UIMM", VecFP,
+ "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
@@ -404,25 +408,26 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
// to floating-point (sint_to_fp/uint_to_fp) conversions.
let isCodeGenOnly = 1, VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfsx $vD, $vB, 0", VecFP,
+ "vcfsx $vD, $vB, 0", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctuxs $vD, $vB, 0", VecFP,
+ "vctuxs $vD, $vB, 0", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfux $vD, $vB, 0", VecFP,
+ "vcfux $vD, $vB, 0", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctsxs $vD, $vB, 0", VecFP,
+ "vctsxs $vD, $vB, 0", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
}
def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+let isCommutable = 1 in {
def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
@@ -444,24 +449,25 @@ def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
+} // isCommutable
def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghb $vD, $vA, $vB", VecFP,
+ "vmrghb $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghh $vD, $vA, $vB", VecFP,
+ "vmrghh $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghw $vD, $vA, $vB", VecFP,
+ "vmrghw $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglb $vD, $vA, $vB", VecFP,
+ "vmrglb $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglh $vD, $vA, $vB", VecFP,
+ "vmrglh $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglw $vD, $vA, $vB", VecFP,
+ "vmrglw $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
@@ -477,6 +483,7 @@ def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
v4i32, v8i16, v4i32>;
+let isCommutable = 1 in {
def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
v8i16, v16i8>;
def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
@@ -493,6 +500,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
v8i16, v16i8>;
def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
v4i32, v8i16>;
+} // isCommutable
def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
@@ -504,16 +512,16 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubfp $vD, $vA, $vB", VecGeneral,
+ "vsubfp $vD, $vA, $vB", IIC_VecGeneral,
[(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsububm $vD, $vA, $vB", VecGeneral,
+ "vsububm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuhm $vD, $vA, $vB", VecGeneral,
+ "vsubuhm $vD, $vA, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuwm $vD, $vA, $vB", VecGeneral,
+ "vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
@@ -534,15 +542,17 @@ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
v4i32, v16i8, v4i32>;
def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vnor $vD, $vA, $vB", VecFP,
+ "vnor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
v4i32:$vB)))]>;
+let isCommutable = 1 in {
def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vor $vD, $vA, $vB", VecFP,
+ "vor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vxor $vD, $vA, $vB", VecFP,
+ "vxor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+} // isCommutable
def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
@@ -556,15 +566,15 @@ def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltb $vD, $vB, $UIMM", VecPerm,
+ "vspltb $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vsplth $vD, $vB, $UIMM", VecPerm,
+ "vsplth $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltw $vD, $vB, $UIMM", VecPerm,
+ "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
@@ -580,13 +590,13 @@ def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisb $vD, $SIMM", VecPerm,
+ "vspltisb $vD, $SIMM", IIC_VecPerm,
[(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltish $vD, $SIMM", VecPerm,
+ "vspltish $vD, $SIMM", IIC_VecPerm,
[(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisw $vD, $SIMM", VecPerm,
+ "vspltisw $vD, $SIMM", IIC_VecPerm,
[(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
// Vector Pack.
@@ -601,13 +611,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
v8i16, v4i32>;
def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuhum $vD, $vA, $vB", VecFP,
+ "vpkuhum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuwum $vD, $vA, $vB", VecFP,
+ "vpkuwum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
@@ -631,10 +641,12 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
@@ -676,24 +688,24 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllZerosV))]>;
def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v8i16:$vD, (v8i16 immAllZerosV))]>;
def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v4i32:$vD, (v4i32 immAllZerosV))]>;
let IMM=-1 in {
def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllOnesV))]>;
def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v8i16:$vD, (v8i16 immAllOnesV))]>;
def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
}
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 29233d4..7fed2c6 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -14,6 +14,8 @@
class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
: Instruction {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+ let Size = 4;
bit PPC64 = 0; // Default value, override with isPPC64
@@ -67,6 +69,8 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: Instruction {
field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ let Size = 8;
bit PPC64 = 0; // Default value, override with isPPC64
@@ -109,7 +113,7 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
bits<3> CR;
bits<14> BD;
@@ -135,7 +139,7 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<14> BD;
let Inst{6-10} = bo;
@@ -147,7 +151,7 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
class BForm_3<bits<6> opcode, bit aa, bit lk,
dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<5> BO;
bits<5> BI;
bits<14> BD;
@@ -159,6 +163,19 @@ class BForm_3<bits<6> opcode, bit aa, bit lk,
let Inst{31} = lk;
}
+class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.3 SC-Form
class SCForm<bits<6> opcode, bits<1> xo,
dag OOL, dag IOL, string asmstr, InstrItinClass itin,
@@ -258,6 +275,15 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Addr = 0;
}
+class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = R;
+ let B = R;
+ let C = 0;
+}
+
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -567,6 +593,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -664,6 +857,12 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
let BH = 0;
}
+class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BH = 0;
+}
class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 315ad04..939bbdc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,16 +18,19 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,6 +48,13 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
+cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
+cl::Hidden);
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -61,7 +71,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II = TM->getInstrItineraryData();
- return new PPCScoreboardHazardRecognizer(II, DAG);
+ return new ScoreboardHazardRecognizer(II, DAG);
}
return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
@@ -74,6 +84,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const ScheduleDAG *DAG) const {
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ if (Directive == PPC::DIR_PWR7)
+ return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
+
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
@@ -82,7 +95,57 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
return new PPCHazardRecognizer970(TM);
}
- return new PPCScoreboardHazardRecognizer(II, DAG);
+ return new ScoreboardHazardRecognizer(II, DAG);
+}
+
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+ UseMI, UseIdx);
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool IsRegCR;
+ if (TRI->isVirtualRegister(Reg)) {
+ const MachineRegisterInfo *MRI =
+ &DefMI->getParent()->getParent()->getRegInfo();
+ IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+ } else {
+ IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+ PPC::CRBITRCRegClass.contains(Reg);
+ }
+
+ if (UseMI->isBranch() && IsRegCR) {
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+
+ // On some cores, there is an additional delay between writing to a condition
+ // register, and using it from a branch.
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ switch (Directive) {
+ default: break;
+ case PPC::DIR_7400:
+ case PPC::DIR_750:
+ case PPC::DIR_970:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ Latency += 2;
+ break;
+ }
+ }
+
+ return Latency;
}
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
@@ -110,7 +173,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::LFS:
case PPC::LFD:
case PPC::RESTORE_CR:
+ case PPC::RESTORE_CRBIT:
case PPC::LVX:
+ case PPC::LXVD2X:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -134,7 +199,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::STFS:
case PPC::STFD:
case PPC::SPILL_CR:
+ case PPC::SPILL_CRBIT:
case PPC::STVX:
+ case PPC::STXVD2X:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -156,7 +223,9 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
- MI->getOpcode() != PPC::RLWIMIo)
+ MI->getOpcode() != PPC::RLWIMIo &&
+ MI->getOpcode() != PPC::RLWIMI8 &&
+ MI->getOpcode() != PPC::RLWIMI8o)
return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
@@ -174,6 +243,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
+ unsigned SubReg1 = MI->getOperand(1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(2).getSubReg();
bool Reg1IsKill = MI->getOperand(1).isKill();
bool Reg2IsKill = MI->getOperand(2).isKill();
bool ChangeReg0 = false;
@@ -183,6 +254,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Must be two address instruction!
assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
"Expecting a two-address instruction!");
+ assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
Reg2IsKill = false;
ChangeReg0 = true;
}
@@ -203,10 +275,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
.addImm((MB-1) & 31);
}
- if (ChangeReg0)
+ if (ChangeReg0) {
MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(0).setSubReg(SubReg2);
+ }
MI->getOperand(2).setReg(Reg1);
MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setSubReg(SubReg1);
+ MI->getOperand(1).setSubReg(SubReg2);
MI->getOperand(2).setIsKill(Reg1IsKill);
MI->getOperand(1).setIsKill(Reg2IsKill);
@@ -216,13 +292,37 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
+bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ // For VSX A-Type FMA instructions, it is the first two operands that can be
+ // commuted, however, because the non-encoded tied input operand is listed
+ // first, the operands to swap are actually the second and third.
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+
+ SrcOpIdx1 = 2;
+ SrcOpIdx2 = 3;
+ return true;
+}
+
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
+ // This function is used for scheduling, and the nop wanted here is the type
+ // that terminates dispatch groups on the POWER cores.
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Opcode;
+ switch (Directive) {
+ default: Opcode = PPC::NOP; break;
+ case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+ case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ }
+
DebugLoc DL;
- BuildMI(MBB, MI, DL, get(PPC::NOP));
+ BuildMI(MBB, MI, DL, get(Opcode));
}
-
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -263,6 +363,22 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
+ } else if (LastInst->getOpcode() == PPC::BC) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCn) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
} else if (LastInst->getOpcode() == PPC::BDNZ8 ||
LastInst->getOpcode() == PPC::BDNZ) {
if (!LastInst->getOperand(0).isMBB())
@@ -310,6 +426,26 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BCn &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
} else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
SecondLastInst->getOpcode() == PPC::BDNZ) &&
LastInst->getOpcode() == PPC::B) {
@@ -367,6 +503,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
--I;
}
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 0;
@@ -379,6 +516,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin()) return 1;
--I;
if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 1;
@@ -408,9 +546,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
return 1;
}
@@ -419,9 +561,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -506,6 +652,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
+ case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
@@ -534,6 +682,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSLRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSHRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSLRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSHRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -545,6 +734,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // There are two different ways this can be done:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ // We'll always use xxlor here, because in practically all cases where
+ // copies are generated, they are close enough to some use that the
+ // lower-latency form is preferable.
+ Opc = PPC::XXLOR;
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::XXLORf;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -570,12 +771,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// update isStoreToStackSlot.
DebugLoc DL;
- if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
getKillRegState(isKill)),
@@ -597,45 +800,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
- // backend currently only uses CR1EQ as an individual bit, this should
- // not cause any bug. If we need other uses of CR bits, the following
- // code may be invalid.
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
-
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -695,10 +882,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
// Note: If additional load instructions are added here,
// update isLoadFromStackSlot.
- if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
@@ -713,40 +902,22 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
-
- unsigned Reg = 0;
- if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
- DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
- DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
- DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
- DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
- DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
- DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
- DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
- DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
-
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CRBIT), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -933,9 +1104,17 @@ bool PPCInstrInfo::PredicateInstruction(
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
- } else {
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI->setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(PPC::BCLRn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else {
+ MI->setDesc(get(PPC::BCCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
}
@@ -947,6 +1126,22 @@ bool PPCInstrInfo::PredicateInstruction(
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
MI->RemoveOperand(0);
@@ -966,8 +1161,23 @@ bool PPCInstrInfo::PredicateInstruction(
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
- MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
- (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+
+ if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+ (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) :
+ (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) :
+ (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
@@ -1152,8 +1362,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
if (equalityOnly) {
// We need to check the uses of the condition register in order to reject
// non-equality comparisons.
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
- IE = MRI->use_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
+ IE = MRI->use_instr_end(); I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
@@ -1175,8 +1385,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
I != EL; ++I) {
bool FoundUse = false;
- for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
- JE = MRI->use_end(); J != JE; ++J)
+ for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
+ JE = MRI->use_instr_end(); J != JE; ++J)
if (&*J == &*I) {
FoundUse = true;
break;
@@ -1285,15 +1495,16 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
}
if (ShouldSwap)
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
- IE = MRI->use_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
assert((!equalityOnly ||
Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
"Invalid predicate for equality-only optimization");
- PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
PPC::getSwappedPredicate(Pred)));
} else if (UseMI->getOpcode() == PPC::ISEL ||
UseMI->getOpcode() == PPC::ISEL8) {
@@ -1306,7 +1517,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
else if (NewSubReg == PPC::sub_gt)
NewSubReg = PPC::sub_lt;
- SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+ SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
NewSubReg));
} else // We need to abort on a user we don't understand.
return false;
@@ -1318,7 +1529,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
CmpInstr->eraseFromParent();
MachineBasicBlock::iterator MII = MI;
- BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+ BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
get(TargetOpcode::COPY), CRReg)
.addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
@@ -1363,26 +1574,497 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
/// instruction may be. This returns the maximum number of bytes.
///
unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case PPC::INLINEASM: { // Inline Asm: Variable size.
+ unsigned Opcode = MI->getOpcode();
+
+ if (Opcode == PPC::INLINEASM) {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
- }
- case PPC::PROLOG_LABEL:
- case PPC::EH_LABEL:
- case PPC::GC_LABEL:
- case PPC::DBG_VALUE:
- return 0;
- case PPC::BL8_NOP:
- case PPC::BLA8_NOP:
- return 8;
- default:
- return 4; // PowerPC instructions are all 4 bytes
+ } else {
+ const MCInstrDesc &Desc = get(Opcode);
+ return Desc.getSize();
}
}
#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace {
+ // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXFMAMutate : public MachineFunctionPass {
+ static char ID;
+ PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ // The default (A-type) VSX FMA form kills the addend (it is taken from
+ // the target register, which is then updated to reflect the result of
+ // the FMA). If the instruction, however, kills one of the registers
+ // used for the product, then we can use the M-form instruction (which
+ // will take that value from the to-be-defined register).
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ continue;
+
+ // This pass is run after register coalescing, and so we're looking for
+ // a situation like this:
+ // ...
+ // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // ...
+ // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+ // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+ // ...
+ // Where we can eliminate the copy by changing from the A-type to the
+ // M-type instruction. Specifically, for this example, this means:
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // is replaced by:
+ // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+ // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+ // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+ SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+ VNInfo *AddendValNo =
+ LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+ MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+ // The addend and this instruction must be in the same block.
+
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
+ continue;
+
+ // The addend must be a full copy within the same register class.
+
+ if (!AddendMI->isFullCopy())
+ continue;
+
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
+
+ // In theory, there could be other uses of the addend copy before this
+ // fma. We could deal with this, but that would require additional
+ // logic below and I suspect it will not occur in any relevant
+ // situations.
+ bool OtherUsers = false;
+ for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+ J != JE; --J)
+ if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+ OtherUsers = true;
+ break;
+ }
+
+ if (OtherUsers)
+ continue;
+
+ // Find one of the product operands that is killed by this instruction.
+
+ unsigned KilledProdOp = 0, OtherProdOp = 0;
+ if (LIS->getInterval(MI->getOperand(2).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 2;
+ OtherProdOp = 3;
+ } else if (LIS->getInterval(MI->getOperand(3).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 3;
+ OtherProdOp = 2;
+ }
+
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
+ if (!KilledProdOp)
+ continue;
+
+ // In order to replace the addend here with the source of the copy,
+ // it must still be live here.
+ if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
+ continue;
+
+ // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+ unsigned AddReg = AddendMI->getOperand(1).getReg();
+ unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+ unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
+
+ unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+ unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+ unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
+
+ bool AddRegKill = AddendMI->getOperand(1).isKill();
+ bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+ bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
+
+ bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+ bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+ bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
+
+ unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+ assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+ "Addend copy not tied to old FMA output!");
+
+ DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
+
+ MI->getOperand(0).setReg(KilledProdReg);
+ MI->getOperand(1).setReg(KilledProdReg);
+ MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(2).setReg(OtherProdReg);
+
+ MI->getOperand(0).setSubReg(KilledProdSubReg);
+ MI->getOperand(1).setSubReg(KilledProdSubReg);
+ MI->getOperand(3).setSubReg(AddSubReg);
+ MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+ MI->getOperand(1).setIsKill(KilledProdRegKill);
+ MI->getOperand(3).setIsKill(AddRegKill);
+ MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+ MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+ MI->getOperand(3).setIsUndef(AddRegUndef);
+ MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+ MI->setDesc(TII->get(AltOpc));
+
+ DEBUG(dbgs() << " -> " << *MI);
+
+ // The killed product operand was killed here, so we can reuse it now
+ // for the result of the fma.
+
+ LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+ VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+ for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+ UI != UE;) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+
+ // Don't replace the result register of the copy we're about to erase.
+ if (UseMI == AddendMI)
+ continue;
+
+ UseMO.setReg(KilledProdReg);
+ UseMO.setSubReg(KilledProdSubReg);
+ }
+
+ // Extend the live intervals of the killed product operand to hold the
+ // fma result.
+
+ LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+ for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+ AI != AE; ++AI) {
+ // Don't add the segment that corresponds to the original copy.
+ if (AI->valno == AddendValNo)
+ continue;
+
+ VNInfo *NewFMAValNo =
+ NewFMAInt.getNextValue(AI->start,
+ LIS->getVNInfoAllocator());
+
+ NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+ NewFMAValNo));
+ }
+ DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+
+ FMAInt.removeValNo(FMAValNo);
+ DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
+
+ // Remove the (now unused) copy.
+
+ DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
+ LIS->RemoveMachineInstrFromMaps(AddendMI);
+ AddendMI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ LIS = &getAnalysis<LiveIntervals>();
+
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ if (DisableVSXFMAMutate)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass*
+llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
+
+namespace llvm {
+ void initializePPCVSXCopyCleanupPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
+ // registers (mostly because the ABI code still places all values into the
+ // "traditional" floating-point and vector registers). Remove them here.
+ struct PPCVSXCopyCleanup : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ SmallVector<MachineInstr *, 4> ToDelete;
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (MI->getOpcode() == PPC::XXLOR &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
+ ToDelete.push_back(MI);
+ }
+
+ if (!ToDelete.empty())
+ Changed = true;
+
+ for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
+ DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
+ ToDelete[i]->eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
+ "PowerPC VSX Copy Cleanup", false, false)
+
+char PPCVSXCopyCleanup::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
+
+#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
@@ -1424,7 +2106,7 @@ protected:
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
- // branch with a blr.
+ // branch with a blr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -1436,7 +2118,7 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
.addImm(J->getOperand(0).getImm())
.addReg(J->getOperand(1).getReg());
MachineBasicBlock::iterator K = J--;
@@ -1445,6 +2127,20 @@ protected:
++NumBCLR;
continue;
}
+ } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+ if (J->getOperand(1).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ?
+ PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg());
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
} else if (J->isBranch()) {
if (J->isIndirectBranch()) {
if (ReturnMBB.hasAddressTaken())
@@ -1466,7 +2162,7 @@ protected:
if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
OtherReference = true;
- // Predecessors are stored in a vector and can't be removed here.
+ // Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
PredToRemove.push_back(*PI);
}
@@ -1509,7 +2205,7 @@ public:
return Changed;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
@@ -1529,4 +2225,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
char PPCEarlyReturn::ID = 0;
FunctionPass*
llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
-
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index f140c41..3c8117c 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -95,6 +95,18 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
+ UseNode, UseIdx);
+ }
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const;
@@ -107,6 +119,9 @@ public:
// rotate amt is zero. We also have to munge the immediates a bit.
virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+ virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 2bd3aad..1d984ab 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -99,6 +99,8 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
+
def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
[SDNPMayLoad]>;
@@ -288,6 +290,12 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit
+ // zero extended field.
+ return isUInt<32>(Imm);
+}]>;
+
// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
@@ -404,6 +412,14 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -411,6 +427,7 @@ def PPCS5ImmAsmOperand : AsmOperandClass {
def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
let ParserMatchClass = PPCS5ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<5>";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
@@ -419,6 +436,7 @@ def PPCU5ImmAsmOperand : AsmOperandClass {
def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
let ParserMatchClass = PPCU5ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
@@ -427,6 +445,7 @@ def PPCU6ImmAsmOperand : AsmOperandClass {
def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
let ParserMatchClass = PPCU6ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<6>";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
@@ -436,6 +455,7 @@ def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
@@ -445,6 +465,7 @@ def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
@@ -457,6 +478,7 @@ def s17imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def PPCDirectBrAsmOperand : AsmOperandClass {
let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
@@ -502,6 +524,7 @@ def PPCCRBitMaskOperand : AsmOperandClass {
def crbitm: Operand<i8> {
let PrintMethod = "printcrbitm";
let EncoderMethod = "get_crbitm_encoding";
+ let DecoderMethod = "decodeCRBitMOperand";
let ParserMatchClass = PPCCRBitMaskOperand;
}
// Address operands
@@ -539,6 +562,7 @@ def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
+ let DecoderMethod = "decodeMemRIOperands";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
@@ -548,6 +572,7 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
+ let DecoderMethod = "decodeMemRIXOperands";
}
// A single-register address. This is used with the SjLj
@@ -555,6 +580,14 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc:$ptrreg);
}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
+def tlsreg32 : Operand<i32> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
@@ -580,6 +613,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -613,20 +647,6 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
-multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmbase, string asmstr, InstrItinClass itin,
- list<dag> pattern> {
- let BaseName = asmbase in {
- def NAME : XForm_10<opcode, xo, OOL, IOL,
- !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
- pattern>, RecFormRel;
- let Defs = [CR0] in
- def o : XForm_10<opcode, xo, OOL, IOL,
- !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
- }
-}
-
multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -887,30 +907,63 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
+ [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
+ def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
+ [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
+ def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ f4rc:$T, f4rc:$F), "#SELECT_F4",
+ [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
+ def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ f8rc:$T, f8rc:$F), "#SELECT_F8",
+ [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ vrrc:$T, vrrc:$F), "#SELECT_VRRC",
+ [(set v4i32:$dst,
+ (select i1:$cond, v4i32:$T, v4i32:$F))]>;
}
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
-let mayStore = 1 in
+let mayStore = 1 in {
def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
+def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+ "#SPILL_CRBIT", []>;
+}
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
-let mayLoad = 1 in
+let mayLoad = 1 in {
def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
+def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+ "#RESTORE_CRBIT", []>;
+}
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
- def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
- def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
- let isCodeGenOnly = 1 in
- def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>;
+ }
}
}
@@ -921,10 +974,10 @@ let Defs = [LR] in
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[(br bb:$dst)]>;
def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
- "ba $dst", BrB, []>;
+ "ba $dst", IIC_BrB, []>;
}
// BCC represents an arbitrary conditional branch on a predicate.
@@ -938,23 +991,39 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
let isReturn = 1, Uses = [LR, RM] in
- def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
- "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>;
+ def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>;
+ }
+
+ let isCodeGenOnly = 1 in {
+ let Pattern = [(brcond i1:$bi, bb:$dst)] in
+ def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 12, $bi, $dst">;
+
+ let Pattern = [(brcond (not i1:$bi), bb:$dst)] in
+ def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 4, $bi, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
+ "bclr 12, $bi, 0", IIC_BrB, []>;
+ def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
+ "bclr 4, $bi, 0", IIC_BrB, []>;
}
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
- "bdzlr", BrB, []>;
+ "bdzlr", IIC_BrB, []>;
def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
- "bdnzlr", BrB, []>;
+ "bdnzlr", IIC_BrB, []>;
def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
- "bdzlr+", BrB, []>;
+ "bdzlr+", IIC_BrB, []>;
def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
- "bdnzlr+", BrB, []>;
+ "bdnzlr+", IIC_BrB, []>;
def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
- "bdzlr-", BrB, []>;
+ "bdzlr-", IIC_BrB, []>;
def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
- "bdnzlr-", BrB, []>;
+ "bdnzlr-", IIC_BrB, []>;
}
let Defs = [CTR], Uses = [CTR] in {
@@ -997,33 +1066,54 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
+ "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+
+ def BCL : BForm_4<16, 12, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 12, $bi, $dst">;
+ def BCLn : BForm_4<16, 4, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 4, $bi, $dst">;
}
}
let Uses = [CTR, RM] in {
def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", BrB, [(PPCbctrl)]>,
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
Requires<[In32BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>;
+ }
}
let Uses = [LR, RM] in {
def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
- "blrl", BrB, []>;
+ "blrl", IIC_BrB, []>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
- let isCodeGenOnly = 1 in
- def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
- "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>;
+ def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 12, $bi, 0", IIC_BrB, []>;
+ def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 4, $bi, 0", IIC_BrB, []>;
+ }
}
let Defs = [CTR], Uses = [CTR, RM] in {
def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
@@ -1053,17 +1143,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
let Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
- "bdzlrl", BrB, []>;
+ "bdzlrl", IIC_BrB, []>;
def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
- "bdnzlrl", BrB, []>;
+ "bdnzlrl", IIC_BrB, []>;
def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
- "bdzlrl+", BrB, []>;
+ "bdzlrl+", IIC_BrB, []>;
def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
- "bdnzlrl+", BrB, []>;
+ "bdnzlrl+", IIC_BrB, []>;
def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
- "bdzlrl-", BrB, []>;
+ "bdzlrl-", IIC_BrB, []>;
def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
- "bdnzlrl-", BrB, []>;
+ "bdnzlrl-", IIC_BrB, []>;
}
}
@@ -1089,19 +1179,19 @@ let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
-def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In32BitMode]>;
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>, Requires<[In32BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", BrB,
+ "ba $dst", IIC_BrB,
[]>;
}
@@ -1127,33 +1217,33 @@ let isBranch = 1, isTerminator = 1 in {
// System call.
let PPC970_Unit = 7 in {
def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
- "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
+ "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
}
// DCB* instructions.
-def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
- "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
+ IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
- "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
- "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
- "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
- "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
- "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
- "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
- "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
@@ -1241,26 +1331,26 @@ let usesCustomInserter = 1 in {
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", LdStLWARX,
+ "lwarx $rD, $src", IIC_LdStLWARX,
[(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", LdStSTWCX,
+ "stwcx. $rS, $dst", IIC_LdStSTWCX,
[(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
-def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
- "twi $to, $rA, $imm", IntTrapW, []>;
+ "twi $to, $rA, $imm", IIC_IntTrapW, []>;
def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
- "tw $to, $rA, $rB", IntTrapW, []>;
+ "tw $to, $rA, $rB", IIC_IntTrapW, []>;
def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
- "tdi $to, $rA, $imm", IntTrapD, []>;
+ "tdi $to, $rA, $imm", IIC_IntTrapD, []>;
def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
- "td $to, $rA, $rB", IntTrapD, []>;
+ "td $to, $rA, $rB", IIC_IntTrapD, []>;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
@@ -1269,56 +1359,56 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
// Unindexed (r+i) Loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStLoad,
+ "lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
- "lha $rD, $src", LdStLHA,
+ "lha $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStLoad,
+ "lhz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStLoad,
+ "lwz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
- "lfs $rD, $src", LdStLFD,
+ "lfs $rD, $src", IIC_LdStLFD,
[(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
- "lfd $rD, $src", LdStLFD,
+ "lfd $rD, $src", IIC_LdStLFD,
[(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1, neverHasSideEffects = 1 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoadUpd,
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStLHAU,
+ "lhau $rD, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoadUpd,
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoadUpd,
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfsu $rD, $addr", LdStLFDU,
+ "lfsu $rD, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfdu $rD, $addr", LdStLFDU,
+ "lfdu $rD, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -1326,37 +1416,37 @@ def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoadUpd,
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLHAU,
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoadUpd,
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoadUpd,
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lfsux $rD, $addr", LdStLFDU,
+ "lfsux $rD, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lfdux $rD, $addr", LdStLFDU,
+ "lfdux $rD, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -1366,45 +1456,45 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
//
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStLoad,
+ "lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
- "lhax $rD, $src", LdStLHA,
+ "lhax $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStLoad,
+ "lhzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStLoad,
+ "lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
- "lhbrx $rD, $src", LdStLoad,
+ "lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
- "lwbrx $rD, $src", LdStLoad,
+ "lwbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
- "lfsx $frD, $src", LdStLFD,
+ "lfsx $frD, $src", IIC_LdStLFD,
[(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
- "lfdx $frD, $src", LdStLFD,
+ "lfdx $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwax $frD, $src", LdStLFD,
+ "lfiwax $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwzx $frD, $src", LdStLFD,
+ "lfiwzx $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
// Load Multiple
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
- "lmw $rD, $src", LdStLMW, []>;
+ "lmw $rD, $src", IIC_LdStLMW, []>;
//===----------------------------------------------------------------------===//
// PPC32 Store Instructions.
@@ -1413,38 +1503,38 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
- "stb $rS, $src", LdStStore,
+ "stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
- "sth $rS, $src", LdStStore,
+ "sth $rS, $src", IIC_LdStStore,
[(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
- "stw $rS, $src", LdStStore,
+ "stw $rS, $src", IIC_LdStStore,
[(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
- "stfs $rS, $dst", LdStSTFD,
+ "stfs $rS, $dst", IIC_LdStSTFD,
[(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
- "stfd $rS, $dst", LdStSTFD,
+ "stfd $rS, $dst", IIC_LdStSTFD,
[(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
- "stfsu $rS, $dst", LdStSTFDU, []>,
+ "stfsu $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
- "stfdu $rS, $dst", LdStSTFDU, []>,
+ "stfdu $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
@@ -1465,59 +1555,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
// Indexed (r+r) Stores.
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStStore,
+ "stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStStore,
+ "sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStStore,
+ "stwx $rS, $dst", IIC_LdStStore,
[(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
- "sthbrx $rS, $dst", LdStStore,
+ "sthbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
- "stwbrx $rS, $dst", LdStStore,
+ "stwbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfiwx $frS, $dst", LdStSTFD,
+ "stfiwx $frS, $dst", IIC_LdStSTFD,
[(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
- "stfsx $frS, $dst", LdStSTFD,
+ "stfsx $frS, $dst", IIC_LdStSTFD,
[(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfdx $frS, $dst", LdStSTFD,
+ "stfdx $frS, $dst", IIC_LdStSTFD,
[(store f64:$frS, xaddr:$dst)]>;
}
// Indexed (r+r) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
- "stfsux $rS, $dst", LdStSTFDU, []>,
+ "stfsux $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
- "stfdux $rS, $dst", LdStSTFDU, []>,
+ "stfdux $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
}
@@ -1538,11 +1628,20 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
// Store Multiple
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
- "stmw $rS, $dst", LdStLMW, []>;
+ "stmw $rS, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", LdStSync, []>;
-def : Pat<(int_ppc_sync), (SYNC 0)>;
+ "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+
+let isCodeGenOnly = 1 in {
+ def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+ let L = 0;
+ }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -1550,41 +1649,41 @@ def : Pat<(int_ppc_sync), (SYNC 0)>;
let PPC970_Unit = 1 in { // FXU Operations.
def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntSimple,
+ "addi $rD, $rA, $imm", IIC_IntSimple,
[(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
let BaseName = "addic" in {
let Defs = [CARRY] in
def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic $rD, $rA, $imm", IntGeneral,
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
[(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic. $rD, $rA, $imm", IntGeneral,
+ "addic. $rD, $rA, $imm", IIC_IntGeneral,
[]>, isDOT, RecFormRel;
}
def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
- "addis $rD, $rA, $imm", IntSimple,
+ "addis $rD, $rA, $imm", IIC_IntSimple,
[(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
let isCodeGenOnly = 1 in
def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
- "la $rD, $sym($rA)", IntGeneral,
+ "la $rD, $sym($rA)", IIC_IntGeneral,
[(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "mulli $rD, $rA, $imm", IntMulLI,
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
[(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
let Defs = [CARRY] in
def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "subfic $rD, $rA, $imm", IntGeneral,
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
[(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
- "li $rD, $imm", IntSimple,
+ "li $rD, $imm", IIC_IntSimple,
[(set i32:$rD, imm32SExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
- "lis $rD, $imm", IntSimple,
+ "lis $rD, $imm", IIC_IntSimple,
[(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
@@ -1592,154 +1691,170 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andi. $dst, $src1, $src2", IntGeneral,
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andis. $dst, $src1, $src2", IntGeneral,
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
}
def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntSimple,
+ "ori $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntSimple,
+ "oris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntSimple,
+ "xori $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntSimple,
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
-def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
+
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
[]>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
- "cmpwi $crD, $rA, $imm", IntCompare>;
+ "cmpwi $crD, $rA, $imm", IIC_IntCompare>;
def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
- "cmplwi $dst, $src1, $src2", IntCompare>;
+ "cmplwi $dst, $src1, $src2", IIC_IntCompare>;
}
}
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+let isCommutable = 1 in {
defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nand", "$rA, $rS, $rB", IntSimple,
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "and", "$rA, $rS, $rB", IntSimple,
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+} // isCommutable
defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "andc", "$rA, $rS, $rB", IntSimple,
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "or", "$rA, $rS, $rB", IntSimple,
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (or i32:$rS, i32:$rB))]>;
defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nor", "$rA, $rS, $rB", IntSimple,
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+} // isCommutable
defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "orc", "$rA, $rS, $rB", IntSimple,
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "eqv", "$rA, $rS, $rB", IntSimple,
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "xor", "$rA, $rS, $rB", IntSimple,
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+} // isCommutable
defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "slw", "$rA, $rS, $rB", IntGeneral,
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral,
[(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "srw", "$rA, $rS, $rB", IntGeneral,
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral,
[(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "sraw", "$rA, $rS, $rB", IntShift,
+ "sraw", "$rA, $rS, $rB", IIC_IntShift,
[(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
let PPC970_Unit = 1 in { // FXU Operations.
let neverHasSideEffects = 1 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
- "srawi", "$rA, $rS, $SH", IntShift,
+ "srawi", "$rA, $rS, $SH", IIC_IntShift,
[(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
- "cntlzw", "$rA, $rS", IntGeneral,
+ "cntlzw", "$rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctlz i32:$rS))]>;
defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
- "extsb", "$rA, $rS", IntSimple,
+ "extsb", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
- "extsh", "$rA, $rS", IntSimple,
+ "extsh", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
}
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmpw $crD, $rA, $rB", IntCompare>;
+ "cmpw $crD, $rA, $rB", IIC_IntCompare>;
def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmplw $crD, $rA, $rB", IntCompare>;
+ "cmplw $crD, $rA, $rB", IIC_IntCompare>;
}
}
let PPC970_Unit = 3 in { // FPU Operations.
//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
-// "fcmpo $crD, $fA, $fB", FPCompare>;
+// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
let isCompare = 1, neverHasSideEffects = 1 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
}
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiw", "$frD, $frB", FPGeneral,
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwz", "$frD, $frB", FPGeneral,
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
- "frsp", "$frD, $frB", FPGeneral,
+ "frsp", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
- "frin", "$frD, $frB", FPGeneral,
+ "frin", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
- "frin", "$frD, $frB", FPGeneral,
+ "frin", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
- "frip", "$frD, $frB", FPGeneral,
+ "frip", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fceil f64:$frB))]>;
defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
- "frip", "$frD, $frB", FPGeneral,
+ "frip", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fceil f32:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
- "friz", "$frD, $frB", FPGeneral,
+ "friz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (ftrunc f64:$frB))]>;
defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
- "friz", "$frD, $frB", FPGeneral,
+ "friz", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (ftrunc f32:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
- "frim", "$frD, $frB", FPGeneral,
+ "frim", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (ffloor f64:$frB))]>;
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
- "frim", "$frD, $frB", FPGeneral,
+ "frim", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (ffloor f32:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
- "fsqrt", "$frD, $frB", FPSqrt,
+ "fsqrt", "$frD, $frB", IIC_FPSqrtD,
[(set f64:$frD, (fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
- "fsqrts", "$frD, $frB", FPSqrt,
+ "fsqrts", "$frD, $frB", IIC_FPSqrtS,
[(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
@@ -1751,54 +1866,54 @@ let Uses = [RM] in {
/// sneak into a d-group with a store).
let neverHasSideEffects = 1 in
defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
- "fmr", "$frD, $frB", FPGeneral,
+ "fmr", "$frD, $frB", IIC_FPGeneral,
[]>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
- "fabs", "$frD, $frB", FPGeneral,
+ "fabs", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fabs f32:$frB))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
- "fabs", "$frD, $frB", FPGeneral,
+ "fabs", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fabs f64:$frB))]>;
defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
- "fnabs", "$frD, $frB", FPGeneral,
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fneg (fabs f32:$frB)))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
- "fnabs", "$frD, $frB", FPGeneral,
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fneg (fabs f64:$frB)))]>;
defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
- "fneg", "$frD, $frB", FPGeneral,
+ "fneg", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fneg f32:$frB))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
- "fneg", "$frD, $frB", FPGeneral,
+ "fneg", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
[(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
[(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
- "fre", "$frD, $frB", FPGeneral,
+ "fre", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfre f64:$frB))]>;
defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
- "fres", "$frD, $frB", FPGeneral,
+ "fres", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfre f32:$frB))]>;
defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
- "frsqrte", "$frD, $frB", FPGeneral,
+ "frsqrte", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
- "frsqrtes", "$frD, $frB", FPGeneral,
+ "frsqrtes", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
@@ -1806,57 +1921,67 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
//
let neverHasSideEffects = 1 in
def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
- "mcrf $BF, $BFA", BrMCR>,
+ "mcrf $BF, $BFA", IIC_BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+let isCommutable = 1 in {
def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crand $CRD, $CRA, $CRB", BrCR, []>;
+ "crand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>;
def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crnand $CRD, $CRA, $CRB", BrCR, []>;
+ "crnand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>;
def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "cror $CRD, $CRA, $CRB", BrCR, []>;
+ "cror $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>;
def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crxor $CRD, $CRA, $CRB", BrCR, []>;
+ "crxor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>;
def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crnor $CRD, $CRA, $CRB", BrCR, []>;
+ "crnor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>;
def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "creqv $CRD, $CRA, $CRB", BrCR, []>;
+ "creqv $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>;
+} // isCommutable
def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crandc $CRD, $CRA, $CRB", BrCR, []>;
+ "crandc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>;
def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crorc $CRD, $CRA, $CRB", BrCR, []>;
+ "crorc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
- "creqv $dst, $dst, $dst", BrCR,
- []>;
+ "creqv $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 1)]>;
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
- "crxor $dst, $dst, $dst", BrCR,
- []>;
+ "crxor $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 0)]>;
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
- "creqv 6, 6, 6", BrCR,
+ "creqv 6, 6, 6", IIC_BrCR,
[(PPCcr6set)]>;
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
- "crxor 6, 6, 6", BrCR,
+ "crxor 6, 6, 6", IIC_BrCR,
[(PPCcr6unset)]>;
}
}
@@ -1865,38 +1990,38 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
//
def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
- "mfspr $RT, $SPR", SprMFSPR>;
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
- "mtspr $SPR, $RT", SprMTSPR>;
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
- "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
+ "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>;
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
- "mfctr $rT", SprMFSPR>,
+ "mfctr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
let Pattern = [(int_ppc_mtctr i32:$rS)] in
def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
- "mtlr $rS", SprMTSPR>,
+ "mtlr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR] in {
def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
- "mflr $rT", SprMFSPR>,
+ "mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -1905,19 +2030,19 @@ let isCodeGenOnly = 1 in {
// like a GPR on the PPC970. As such, copies in and out have the same
// performance characteristics as an OR instruction.
def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
- "mtspr 256, $rS", IntGeneral>,
+ "mtspr 256, $rS", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
- "mfspr $rT, 256", IntGeneral>,
+ "mfspr $rT, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
(outs VRSAVERC:$reg), (ins gprc:$rS),
- "mtspr 256, $rS", IntGeneral>,
+ "mtspr 256, $rS", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
(ins VRSAVERC:$reg),
- "mfspr $rT, 256", IntGeneral>,
+ "mfspr $rT, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -1935,20 +2060,20 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
let neverHasSideEffects = 1 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
- "mtocrf $FXM, $ST", BrMCRX>,
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
- "mtcrf $FXM, $rS", BrMCRX>,
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
- "mfcr $rT", SprMFCR>,
+ "mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // neverHasSideEffects = 1
@@ -1962,18 +2087,18 @@ let usesCustomInserter = 1, Uses = [RM] in {
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IntMTFSB0, []>,
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IntMTFSB0, []>,
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
- "mtfsf $FM, $rT", IntMTFSB0, []>,
+ "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs $rT", IntMFFS,
+ "mffs $rT", IIC_IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1981,59 +2106,68 @@ let Uses = [RM] in {
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
-//
+let isCommutable = 1 in
defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "add", "$rT, $rA, $rB", IntSimple,
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+let isCodeGenOnly = 1 in
+def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "addc", "$rT, $rA, $rB", IntGeneral,
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
+
defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IntDivW,
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
[(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IntDivW,
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
[(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhw", "$rT, $rA, $rB", IntMulHW,
+ "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+ "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mullw", "$rT, $rA, $rB", IntMulHW,
+ "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+} // isCommutable
defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subf", "$rT, $rA, $rB", IntGeneral,
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfc", "$rT, $rA, $rB", IntGeneral,
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
- "neg", "$rT, $rA", IntSimple,
+ "neg", "$rT, $rA", IIC_IntSimple,
[(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY] in {
+let isCommutable = 1 in
defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "adde", "$rT, $rA, $rB", IntGeneral,
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addme", "$rT, $rA", IntGeneral,
+ "addme", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, -1))]>;
defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addze", "$rT, $rA", IntGeneral,
+ "addze", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, 0))]>;
defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfe", "$rT, $rA, $rB", IntGeneral,
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfme", "$rT, $rA", IntGeneral,
+ "subfme", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (sube -1, i32:$rA))]>;
defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfze", "$rT, $rA", IntGeneral,
+ "subfze", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
@@ -2043,90 +2177,96 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
//
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
let Uses = [RM] in {
+let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
(fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
(fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
(fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
(fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
+} // isCommutable
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
// should use an FMRSD if the input comparison value really wants to be a float)
// and 4/8 byte forms for the result and operand type..
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FSELD : AForm_1r<63, 23,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FSELS : AForm_1r<63, 23,
(outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
+ let isCommutable = 1 in {
defm FADD : AForm_2r<63, 21,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+ "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
[(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
defm FADDS : AForm_2r<59, 21,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+ "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ } // isCommutable
defm FDIV : AForm_2r<63, 18,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+ "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
[(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
defm FDIVS : AForm_2r<59, 18,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+ "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
[(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ let isCommutable = 1 in {
defm FMUL : AForm_3r<63, 25,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
- "fmul", "$FRT, $FRA, $FRC", FPFused,
+ "fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
[(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
defm FMULS : AForm_3r<59, 25,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
- "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+ "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
[(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ } // isCommutable
defm FSUB : AForm_2r<63, 20,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+ "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
[(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
defm FSUBS : AForm_2r<59, 20,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+ "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
@@ -2136,7 +2276,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
(outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
[]>;
}
@@ -2147,24 +2287,24 @@ let isCommutable = 1 in {
// RLWIMI can be commuted if the rotate amount is zero.
defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
(ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
- u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
- []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
- NoEncode<"$rSi">;
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
}
let BaseName = "rlwinm" in {
def RLWINM : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
def RLWINMo : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
}
defm RLWNM : MForm_2r<23, (outs gprc:$rA),
(ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
- "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
[]>;
}
} // neverHasSideEffects = 1
@@ -2178,8 +2318,10 @@ def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not i32:$in),
- (NOR $in, $in)>;
+def i32not : OutPatFrag<(ops node:$in),
+ (NOR $in, $in)>;
+def : Pat<(not i32:$in),
+ (i32not $in)>;
// ADD an arbitrary immediate.
def : Pat<(add i32:$in, imm:$imm),
@@ -2250,6 +2392,17 @@ def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
+// Support for thread-local storage.
+def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+ [(set i32:$rD, (PPCppc32GOT))]>;
+
+def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
+ (ADD4TLS $in, tglobaltlsaddr:$g)>;
+
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
@@ -2284,7 +2437,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2304,52 +2458,561 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
+
+def crnot : OutPatFrag<(ops node:$in),
+ (CRNOR $in, $in)>;
+def : Pat<(not i1:$in),
+ (crnot $in)>;
+
+// Patterns for arithmetic i1 operations.
+def : Pat<(add i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(sub i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(mul i1:$a, i1:$b),
+ (CRAND $a, $b)>;
+
+// We're sometimes asked to materialize i1 -1, which is just 1 in this case
+// (-1 is used to mean all bits set).
+def : Pat<(i1 -1), (CRSET)>;
+
+// i1 extensions, implemented in terms of isel.
+def : Pat<(i32 (zext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i32 (sext i1:$in)),
+ (SELECT_I4 $in, (LI -1), (LI 0))>;
+
+def : Pat<(i64 (zext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+def : Pat<(i64 (sext i1:$in)),
+ (SELECT_I8 $in, (LI8 -1), (LI8 0))>;
+
+// FIXME: We should choose either a zext or a sext based on other constants
+// already around.
+def : Pat<(i32 (anyext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i64 (anyext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+
+// match setcc on i1 variables.
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
+ (CREQV $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
+ (CRXOR $s1, $s2)>;
+
+// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE,
+// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for
+// floating-point types.
+
+multiclass CRNotPat<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i32 (zext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+ def : Pat<(i32 (sext pattern)),
+ (SELECT_I4 result, (LI 0), (LI -1))>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i64 (zext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+ def : Pat<(i64 (sext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 -1))>;
+
+ // FIXME: We should choose either a zext or a sext based on other constants
+ // already around.
+ def : Pat<(i32 (anyext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+
+ def : Pat<(i64 (anyext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+}
+
+// FIXME: Because of what seems like a bug in TableGen's type-inference code,
+// we need to write imm:$imm in the output patterns below, not just $imm, or
+// else the resulting matcher will not correctly add the immediate operand
+// (making it a register operand instead).
+
+// extended SETCC.
+multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag,
+ OutPatFrag rfrag, OutPatFrag rfrag8> {
+ def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+
+ def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+}
+
+// Note that we do all inversions below with i(32|64)not, instead of using
+// (xori x, 1) because on the A2 nor has single-cycle latency while xori
+// has 2-cycle latency.
+
+defm : ExtSetCCPat<SETEQ,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (CNTLZW $in), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (CNTLZD $in), 58, 63)> >;
+
+defm : ExtSetCCPat<SETNE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+// SETCC for i32.
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpw cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmplwi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// SETCC for i64.
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpd cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmpldi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// SETCC for f32.
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// SETCC for f64.
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// match select on i1 variables:
+def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
+ (CROR (CRAND $cond , $tval),
+ (CRAND (crnot $cond), $fval))>;
+
+// match selectcc on i1 variables:
+// select (lhs == rhs), tval, fval is:
+// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+ (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+ (CRAND (CRORC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+ (CROR (CRAND (CRORC $rhs, $lhs), $tval),
+ (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $tval),
+ (CRAND (CRXOR $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+ (CROR (CRAND (CRORC $lhs, $rhs), $tval),
+ (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+ (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+ (CRAND (CRORC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $fval),
+ (CRAND (CRXOR $lhs, $rhs), $tval))>;
+
+// match selectcc on i1 variables with non-i1 output.
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+ (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+ (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
+ (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+ (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+ (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
+ (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+ (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+ (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
+ (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+ (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+ (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
+ (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+ (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+ (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
+ (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+ (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+ (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
+ (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+let usesCustomInserter = 1 in {
+def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_EQ_BIT",
+ [(set i1:$dst, (trunc (not i32:$in)))]>;
+def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_GT_BIT",
+ [(set i1:$dst, (trunc i32:$in))]>;
+
+def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_EQ_BIT8",
+ [(set i1:$dst, (trunc (not i64:$in)))]>;
+def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_GT_BIT8",
+ [(set i1:$dst, (trunc i64:$in))]>;
+}
+
+def : Pat<(i1 (not (trunc i32:$in))),
+ (ANDIo_1_EQ_BIT $in)>;
+def : Pat<(i1 (not (trunc i64:$in))),
+ (ANDIo_1_EQ_BIT8 $in)>;
//===----------------------------------------------------------------------===//
// PowerPC Instructions used for assembler/disassembler only
//
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
- "isync", SprISYNC, []>;
+ "isync", IIC_SprISYNC, []>;
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
- "icbi $src", LdStICBI, []>;
+ "icbi $src", IIC_LdStICBI, []>;
def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
- "eieio", LdStLoad, []>;
+ "eieio", IIC_LdStLoad, []>;
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
- "wait $L", LdStLoad, []>;
+ "wait $L", IIC_LdStLoad, []>;
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
- "mtmsr $RS, $L", SprMTMSR>;
+ "mtmsr $RS, $L", IIC_SprMTMSR>;
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
- "mfmsr $RT", SprMFMSR, []>;
+ "mfmsr $RT", IIC_SprMFMSR, []>;
def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
- "mtmsrd $RS, $L", SprMTMSRD>;
+ "mtmsrd $RS, $L", IIC_SprMTMSRD>;
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
- "slbie $RB", SprSLBIE, []>;
+ "slbie $RB", IIC_SprSLBIE, []>;
def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
- "slbmte $RS, $RB", SprSLBMTE, []>;
+ "slbmte $RS, $RB", IIC_SprSLBMTE, []>;
def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
- "slbmfee $RT, $RB", SprSLBMFEE, []>;
+ "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
-def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
- "tlbsync", SprTLBSYNC, []>;
+ "tlbsync", IIC_SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
- "tlbiel $RB", SprTLBIEL, []>;
+ "tlbiel $RB", IIC_SprTLBIEL, []>;
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
- "tlbie $RB,$RS", SprTLBIE, []>;
+ "tlbie $RB,$RS", IIC_SprTLBIE, []>;
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
@@ -2373,10 +3036,10 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
-def : InstAlias<"sync", (SYNC 0)>;
-def : InstAlias<"msync", (SYNC 0)>;
-def : InstAlias<"lwsync", (SYNC 1)>;
-def : InstAlias<"ptesync", (SYNC 2)>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
def : InstAlias<"wait", (WAIT 0)>;
def : InstAlias<"waitrsv", (WAIT 1)>;
@@ -2565,19 +3228,19 @@ let PPC970_Unit = 7 in {
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCLR : XLForm_2<19, 16, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclr $bo, $bi, $bh", BrB, []>;
+ "bclr $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCLRL : XLForm_2<19, 16, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclrl $bo, $bi, $bh", BrB, []>;
+ "bclrl $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCCTR : XLForm_2<19, 528, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctr $bo, $bi, $bh", BrB, []>;
+ "bcctr $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCCTRL : XLForm_2<19, 528, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctrl $bo, $bi, $bh", BrB, []>;
+ "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
}
def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
@@ -2620,14 +3283,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
(BCCA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lr"#pm#" $cc",
- (BCLR bibo, crrc:$cc)>;
+ (BCCLR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lr"#pm,
- (BCLR bibo, CR0)>;
+ (BCCLR bibo, CR0)>;
def : InstAlias<"b"#name#"ctr"#pm#" $cc",
- (BCCTR bibo, crrc:$cc)>;
+ (BCCCTR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctr"#pm,
- (BCCTR bibo, CR0)>;
+ (BCCCTR bibo, CR0)>;
def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
(BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
@@ -2640,14 +3303,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
(BCCLA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lrl"#pm#" $cc",
- (BCLRL bibo, crrc:$cc)>;
+ (BCCLRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lrl"#pm,
- (BCLRL bibo, CR0)>;
+ (BCCLRL bibo, CR0)>;
def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
- (BCCTRL bibo, crrc:$cc)>;
+ (BCCCTRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctrl"#pm,
- (BCCTRL bibo, CR0)>;
+ (BCCCTRL bibo, CR0)>;
}
multiclass BranchExtendedMnemonic<string name, int bibo> {
defm : BranchExtendedMnemonicPM<name, "", bibo>;
@@ -2671,18 +3334,18 @@ def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
-def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>;
def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
-def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
-def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>;
def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
-def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
multiclass TrapExtendedMnemonic<string name, int to> {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 0000000..9cc919e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,816 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget.hasVSX()">;
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def LXSDX : XForm_1<31, 588,
+ (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XForm_1<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVDSX : XForm_1<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+
+ def LXVW4X : XForm_1<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ }
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ }
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDADP" in {
+ let isCommutable = 1 in
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBADP" in {
+ let isCommutable = 1 in
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDADP" in {
+ let isCommutable = 1 in
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBADP" in {
+ let isCommutable = 1 in
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDADP" in {
+ let isCommutable = 1 in
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDASP" in {
+ let isCommutable = 1 in
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBADP" in {
+ let isCommutable = 1 in
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBASP" in {
+ let isCommutable = 1 in
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDADP" in {
+ let isCommutable = 1 in
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDASP" in {
+ let isCommutable = 1 in
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBADP" in {
+ let isCommutable = 1 in
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBASP" in {
+ let isCommutable = 1 in
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsfrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ } // isCommutable
+} // Uses = [RM]
+
+ // Logical Instructions
+ let isCommutable = 1 in
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA,
+ (vnot_ppc v4i32:$XB)))]>;
+ let isCommutable = 1 in {
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA,
+ v4i32:$XB)))]>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+ let isCodeGenOnly = 1 in
+ def XXLORf: XX3Form<60, 146,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
+ } // isCommutable
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // neverHasSideEffects
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+// sign extension patterns
+// To extend "in place" from v2i32 to v2i64, we have input data like:
+// | undef | i32 | undef | i32 |
+// but xvcvsxwdp expects the input in big-Endian format:
+// | i32 | undef | i32 | undef |
+// so we need to shift everything to the left by one i32 (word) before
+// the conversion.
+def : Pat<(sext_inreg v2i64:$C, v2i32),
+ (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
+def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
+ (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+
+} // AddedComplexity
+} // HasVSX
+
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 5e3a48d..227919c 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -214,6 +214,10 @@ asm(
".text\n"
".align 2\n"
".globl PPC64CompilationCallback\n"
+#if _CALL_ELF == 2
+ ".type PPC64CompilationCallback,@function\n"
+"PPC64CompilationCallback:\n"
+#else
".section \".opd\",\"aw\",@progbits\n"
".align 3\n"
"PPC64CompilationCallback:\n"
@@ -223,6 +227,7 @@ asm(
".align 4\n"
".type PPC64CompilationCallback,@function\n"
".L.PPC64CompilationCallback:\n"
+#endif
# else
asm(
".text\n"
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f61c8bf..029bb8a 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -19,11 +19,14 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -32,35 +35,40 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ const TargetMachine &TM = AP.TM;
+ Mangler *Mang = AP.Mang;
+ const DataLayout *DL = TM.getDataLayout();
MCContext &Ctx = AP.OutContext;
SmallString<128> Name;
+ StringRef Suffix;
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB)
+ Suffix = "$stub";
+ else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ Suffix = "$non_lazy_ptr";
+
+ if (!Suffix.empty())
+ Name += DL->getPrivateGlobalPrefix();
+
+ unsigned PrefixLen = Name.size();
+
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
- Name += AP.MAI->getGlobalPrefix();
- Name += MO.getSymbolName();
- } else {
+ Mang->getNameWithPrefix(Name, MO.getSymbolName());
+ } else {
const GlobalValue *GV = MO.getGlobal();
- bool isImplicitlyPrivate = false;
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
- (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
- isImplicitlyPrivate = true;
-
- AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ TM.getNameWithPrefix(Name, GV, *Mang);
}
-
+
+ unsigned OrigLen = Name.size() - PrefixLen;
+
+ Name += Suffix;
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
+
// If the target flags on the operand changes the name of the symbol, do that
// before we return the symbol.
if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
- Name += "$stub";
- const char *PGP = AP.MAI->getPrivateGlobalPrefix();
- const char *Prefix = "";
- if (!Name.startswith(PGP)) {
- // http://llvm.org/bugs/show_bug.cgi?id=15763
- // all stubs and lazy_ptrs should be local symbols, which need leading 'L'
- Prefix = PGP;
- }
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name));
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI(AP).getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -72,10 +80,9 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
- Name.erase(Name.end()-5, Name.end());
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
}
return Sym;
}
@@ -83,9 +90,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
// If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
// then add the suffix.
if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
- Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
-
MachineModuleInfoMachO &MachO = getMachOMMI(AP);
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -101,7 +105,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
return Sym;
}
- return Ctx.GetOrCreateSymbol(Name.str());
+ return Sym;
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 19ccbfc..4ff282e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -230,12 +229,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
+ case PPC::VFRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
}
+const TargetRegisterClass*
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+ if (Subtarget.hasVSX()) {
+ // With VSX, we can inflate various sub-register classes to the full VSX
+ // register set.
+
+ if (RC == &PPC::F8RCRegClass)
+ return &PPC::VSFRCRegClass;
+ else if (RC == &PPC::VRRCRegClass)
+ return &PPC::VSRCRegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -452,6 +472,127 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
+void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
+ getCRFromCRBit(SrcReg))
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(getCRFromCRBit(SrcReg));
+
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit to
+ // store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CRBIT does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
+
+ unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
+ .addReg(getCRFromCRBit(DestReg));
+
+ unsigned ShiftBits = getEncodingValue(DestReg);
+ // rlwimi r11, r10, 32-ShiftBits, ..., ...
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
+ .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32-ShiftBits : 0)
+ .addImm(ShiftBits).addImm(ShiftBits);
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
+ getCRFromCRBit(DestReg))
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -595,6 +736,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_CR) {
lowerCRRestore(II, FrameIndex);
return;
+ } else if (OpC == PPC::SPILL_CRBIT) {
+ lowerCRBitSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CRBIT) {
+ lowerCRBitRestore(II, FrameIndex);
+ return;
} else if (OpC == PPC::SPILL_VRSAVE) {
lowerVRSAVESpilling(II, FrameIndex);
return;
@@ -812,11 +959,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
}
-void
-PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
- unsigned BaseReg, int64_t Offset) const {
- MachineInstr &MI = *I;
-
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI.getOperand(FIOperandNum).isFI()) {
++FIOperandNum;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index dd3bb40..c3e54b4 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef POWERPC32_REGISTERINFO_H
#define POWERPC32_REGISTERINFO_H
-#include "llvm/ADT/DenseMap.h"
#include "PPC.h"
+#include "llvm/ADT/DenseMap.h"
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
@@ -40,6 +40,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
@@ -69,6 +72,10 @@ public:
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
+ void lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerVRSAVERestore(MachineBasicBlock::iterator II,
@@ -85,8 +92,8 @@ public:
void materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const;
- void resolveFrameIndex(MachineBasicBlock::iterator I,
- unsigned BaseReg, int64_t Offset) const;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const;
bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
// Debug information queries.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index d566e2c..e11f7d4 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
}
@@ -47,9 +49,36 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// VR - One of the 32 128-bit vector registers
-class VR<bits<5> num, string n> : PPCReg<n> {
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
+ let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 0;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
}
// CR - One of the 8 4-bit condition registers
@@ -80,12 +109,27 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+ def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
// Vector registers
foreach Index = 0-31 in {
- def V#Index : VR<Index, "v"#Index>,
+ def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
// The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">;
@@ -204,17 +248,39 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
-def CRBITRC : RegisterClass<"PPC", [i32], 32,
- (add CR0LT, CR0GT, CR0EQ, CR0UN,
- CR1LT, CR1GT, CR1EQ, CR1UN,
- CR2LT, CR2GT, CR2EQ, CR2UN,
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSLRC, VSHRC)>;
+
+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC : RegisterClass<"PPC", [f64], 64,
+ (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+ VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+ VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+ VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+ VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
+def CRBITRC : RegisterClass<"PPC", [i1], 32,
+ (add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
CR4LT, CR4GT, CR4EQ, CR4UN,
CR5LT, CR5GT, CR5EQ, CR5UN,
CR6LT, CR6GT, CR6EQ, CR6UN,
- CR7LT, CR7GT, CR7EQ, CR7UN)>
-{
- let CopyCost = -1;
+ CR7LT, CR7GT, CR7EQ, CR7UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR0LT, CR0GT, CR0EQ, CR0UN)> {
+ let Size = 32;
}
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 92ba69c..1221d41 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -8,114 +8,106 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Functional units across PowerPC chips sets
-//
-def BPU : FuncUnit; // Branch unit
-def SLU : FuncUnit; // Store/load unit
-def SRU : FuncUnit; // special register unit
-def IU1 : FuncUnit; // integer unit 1 (simple)
-def IU2 : FuncUnit; // integer unit 2 (complex)
-def FPU1 : FuncUnit; // floating point unit 1
-def FPU2 : FuncUnit; // floating point unit 2
-def VPU : FuncUnit; // vector permutation unit
-def VIU1 : FuncUnit; // vector integer unit 1 (simple)
-def VIU2 : FuncUnit; // vector integer unit 2 (complex)
-def VFPU : FuncUnit; // vector floating point unit
-
-//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for PowerPC
//
-def IntSimple : InstrItinClass;
-def IntGeneral : InstrItinClass;
-def IntCompare : InstrItinClass;
-def IntDivD : InstrItinClass;
-def IntDivW : InstrItinClass;
-def IntMFFS : InstrItinClass;
-def IntMFVSCR : InstrItinClass;
-def IntMTFSB0 : InstrItinClass;
-def IntMTSRD : InstrItinClass;
-def IntMulHD : InstrItinClass;
-def IntMulHW : InstrItinClass;
-def IntMulHWU : InstrItinClass;
-def IntMulLI : InstrItinClass;
-def IntRFID : InstrItinClass;
-def IntRotateD : InstrItinClass;
-def IntRotateDI : InstrItinClass;
-def IntRotate : InstrItinClass;
-def IntShift : InstrItinClass;
-def IntTrapD : InstrItinClass;
-def IntTrapW : InstrItinClass;
-def BrB : InstrItinClass;
-def BrCR : InstrItinClass;
-def BrMCR : InstrItinClass;
-def BrMCRX : InstrItinClass;
-def LdStDCBA : InstrItinClass;
-def LdStDCBF : InstrItinClass;
-def LdStDCBI : InstrItinClass;
-def LdStLoad : InstrItinClass;
-def LdStLoadUpd : InstrItinClass;
-def LdStStore : InstrItinClass;
-def LdStStoreUpd : InstrItinClass;
-def LdStDSS : InstrItinClass;
-def LdStICBI : InstrItinClass;
-def LdStLD : InstrItinClass;
-def LdStLDU : InstrItinClass;
-def LdStLDARX : InstrItinClass;
-def LdStLFD : InstrItinClass;
-def LdStLFDU : InstrItinClass;
-def LdStLHA : InstrItinClass;
-def LdStLHAU : InstrItinClass;
-def LdStLMW : InstrItinClass;
-def LdStLVecX : InstrItinClass;
-def LdStLWA : InstrItinClass;
-def LdStLWARX : InstrItinClass;
-def LdStSLBIA : InstrItinClass;
-def LdStSLBIE : InstrItinClass;
-def LdStSTD : InstrItinClass;
-def LdStSTDCX : InstrItinClass;
-def LdStSTDU : InstrItinClass;
-def LdStSTFD : InstrItinClass;
-def LdStSTFDU : InstrItinClass;
-def LdStSTVEBX : InstrItinClass;
-def LdStSTWCX : InstrItinClass;
-def LdStSync : InstrItinClass;
-def SprISYNC : InstrItinClass;
-def SprMFSR : InstrItinClass;
-def SprMTMSR : InstrItinClass;
-def SprMTSR : InstrItinClass;
-def SprTLBSYNC : InstrItinClass;
-def SprMFCR : InstrItinClass;
-def SprMFMSR : InstrItinClass;
-def SprMFSPR : InstrItinClass;
-def SprMFTB : InstrItinClass;
-def SprMTSPR : InstrItinClass;
-def SprMTSRIN : InstrItinClass;
-def SprRFI : InstrItinClass;
-def SprSC : InstrItinClass;
-def FPGeneral : InstrItinClass;
-def FPAddSub : InstrItinClass;
-def FPCompare : InstrItinClass;
-def FPDivD : InstrItinClass;
-def FPDivS : InstrItinClass;
-def FPFused : InstrItinClass;
-def FPRes : InstrItinClass;
-def FPSqrt : InstrItinClass;
-def VecGeneral : InstrItinClass;
-def VecFP : InstrItinClass;
-def VecFPCompare : InstrItinClass;
-def VecComplex : InstrItinClass;
-def VecPerm : InstrItinClass;
-def VecFPRound : InstrItinClass;
-def VecVSL : InstrItinClass;
-def VecVSR : InstrItinClass;
-def SprMTMSRD : InstrItinClass;
-def SprSLIE : InstrItinClass;
-def SprSLBIE : InstrItinClass;
-def SprSLBMTE : InstrItinClass;
-def SprSLBMFEE : InstrItinClass;
-def SprSLBIA : InstrItinClass;
-def SprTLBIEL : InstrItinClass;
-def SprTLBIE : InstrItinClass;
+def IIC_IntSimple : InstrItinClass;
+def IIC_IntGeneral : InstrItinClass;
+def IIC_IntCompare : InstrItinClass;
+def IIC_IntDivD : InstrItinClass;
+def IIC_IntDivW : InstrItinClass;
+def IIC_IntMFFS : InstrItinClass;
+def IIC_IntMFVSCR : InstrItinClass;
+def IIC_IntMTFSB0 : InstrItinClass;
+def IIC_IntMTSRD : InstrItinClass;
+def IIC_IntMulHD : InstrItinClass;
+def IIC_IntMulHW : InstrItinClass;
+def IIC_IntMulHWU : InstrItinClass;
+def IIC_IntMulLI : InstrItinClass;
+def IIC_IntRFID : InstrItinClass;
+def IIC_IntRotateD : InstrItinClass;
+def IIC_IntRotateDI : InstrItinClass;
+def IIC_IntRotate : InstrItinClass;
+def IIC_IntShift : InstrItinClass;
+def IIC_IntTrapD : InstrItinClass;
+def IIC_IntTrapW : InstrItinClass;
+def IIC_BrB : InstrItinClass;
+def IIC_BrCR : InstrItinClass;
+def IIC_BrMCR : InstrItinClass;
+def IIC_BrMCRX : InstrItinClass;
+def IIC_LdStDCBA : InstrItinClass;
+def IIC_LdStDCBF : InstrItinClass;
+def IIC_LdStDCBI : InstrItinClass;
+def IIC_LdStLoad : InstrItinClass;
+def IIC_LdStLoadUpd : InstrItinClass;
+def IIC_LdStLoadUpdX : InstrItinClass;
+def IIC_LdStStore : InstrItinClass;
+def IIC_LdStStoreUpd : InstrItinClass;
+def IIC_LdStDSS : InstrItinClass;
+def IIC_LdStICBI : InstrItinClass;
+def IIC_LdStLD : InstrItinClass;
+def IIC_LdStLDU : InstrItinClass;
+def IIC_LdStLDUX : InstrItinClass;
+def IIC_LdStLDARX : InstrItinClass;
+def IIC_LdStLFD : InstrItinClass;
+def IIC_LdStLFDU : InstrItinClass;
+def IIC_LdStLFDUX : InstrItinClass;
+def IIC_LdStLHA : InstrItinClass;
+def IIC_LdStLHAU : InstrItinClass;
+def IIC_LdStLHAUX : InstrItinClass;
+def IIC_LdStLMW : InstrItinClass;
+def IIC_LdStLVecX : InstrItinClass;
+def IIC_LdStLWA : InstrItinClass;
+def IIC_LdStLWARX : InstrItinClass;
+def IIC_LdStSLBIA : InstrItinClass;
+def IIC_LdStSLBIE : InstrItinClass;
+def IIC_LdStSTD : InstrItinClass;
+def IIC_LdStSTDCX : InstrItinClass;
+def IIC_LdStSTDU : InstrItinClass;
+def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTFD : InstrItinClass;
+def IIC_LdStSTFDU : InstrItinClass;
+def IIC_LdStSTVEBX : InstrItinClass;
+def IIC_LdStSTWCX : InstrItinClass;
+def IIC_LdStSync : InstrItinClass;
+def IIC_SprISYNC : InstrItinClass;
+def IIC_SprMFSR : InstrItinClass;
+def IIC_SprMTMSR : InstrItinClass;
+def IIC_SprMTSR : InstrItinClass;
+def IIC_SprTLBSYNC : InstrItinClass;
+def IIC_SprMFCR : InstrItinClass;
+def IIC_SprMFCRF : InstrItinClass;
+def IIC_SprMFMSR : InstrItinClass;
+def IIC_SprMFSPR : InstrItinClass;
+def IIC_SprMFTB : InstrItinClass;
+def IIC_SprMTSPR : InstrItinClass;
+def IIC_SprMTSRIN : InstrItinClass;
+def IIC_SprRFI : InstrItinClass;
+def IIC_SprSC : InstrItinClass;
+def IIC_FPGeneral : InstrItinClass;
+def IIC_FPAddSub : InstrItinClass;
+def IIC_FPCompare : InstrItinClass;
+def IIC_FPDivD : InstrItinClass;
+def IIC_FPDivS : InstrItinClass;
+def IIC_FPFused : InstrItinClass;
+def IIC_FPRes : InstrItinClass;
+def IIC_FPSqrtD : InstrItinClass;
+def IIC_FPSqrtS : InstrItinClass;
+def IIC_VecGeneral : InstrItinClass;
+def IIC_VecFP : InstrItinClass;
+def IIC_VecFPCompare : InstrItinClass;
+def IIC_VecComplex : InstrItinClass;
+def IIC_VecPerm : InstrItinClass;
+def IIC_VecFPRound : InstrItinClass;
+def IIC_VecVSL : InstrItinClass;
+def IIC_VecVSR : InstrItinClass;
+def IIC_SprMTMSRD : InstrItinClass;
+def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBIE : InstrItinClass;
+def IIC_SprSLBMTE : InstrItinClass;
+def IIC_SprSLBMFEE : InstrItinClass;
+def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIEL : InstrItinClass;
+def IIC_SprTLBIE : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
@@ -125,6 +117,7 @@ include "PPCSchedule440.td"
include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
+include "PPCScheduleP7.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
@@ -136,392 +129,392 @@ include "PPCScheduleE5500.td"
//
// opcode itinerary class
// ====== ===============
-// add IntSimple
-// addc IntGeneral
-// adde IntGeneral
-// addi IntSimple
-// addic IntGeneral
-// addic. IntGeneral
-// addis IntSimple
-// addme IntGeneral
-// addze IntGeneral
-// and IntSimple
-// andc IntSimple
-// andi. IntGeneral
-// andis. IntGeneral
-// b BrB
-// bc BrB
-// bcctr BrB
-// bclr BrB
-// cmp IntCompare
-// cmpi IntCompare
-// cmpl IntCompare
-// cmpli IntCompare
-// cntlzd IntRotateD
-// cntlzw IntGeneral
-// crand BrCR
-// crandc BrCR
-// creqv BrCR
-// crnand BrCR
-// crnor BrCR
-// cror BrCR
-// crorc BrCR
-// crxor BrCR
-// dcba LdStDCBA
-// dcbf LdStDCBF
-// dcbi LdStDCBI
-// dcbst LdStDCBF
-// dcbt LdStLoad
-// dcbtst LdStLoad
-// dcbz LdStDCBF
-// divd IntDivD
-// divdu IntDivD
-// divw IntDivW
-// divwu IntDivW
-// dss LdStDSS
-// dst LdStDSS
-// dstst LdStDSS
-// eciwx LdStLoad
-// ecowx LdStLoad
-// eieio LdStLoad
-// eqv IntSimple
-// extsb IntSimple
-// extsh IntSimple
-// extsw IntSimple
-// fabs FPGeneral
-// fadd FPAddSub
-// fadds FPGeneral
-// fcfid FPGeneral
-// fcmpo FPCompare
-// fcmpu FPCompare
-// fctid FPGeneral
-// fctidz FPGeneral
-// fctiw FPGeneral
-// fctiwz FPGeneral
-// fdiv FPDivD
-// fdivs FPDivS
-// fmadd FPFused
-// fmadds FPGeneral
-// fmr FPGeneral
-// fmsub FPFused
-// fmsubs FPGeneral
-// fmul FPFused
-// fmuls FPGeneral
-// fnabs FPGeneral
-// fneg FPGeneral
-// fnmadd FPFused
-// fnmadds FPGeneral
-// fnmsub FPFused
-// fnmsubs FPGeneral
-// fres FPRes
-// frsp FPGeneral
-// frsqrte FPGeneral
-// fsel FPGeneral
-// fsqrt FPSqrt
-// fsqrts FPSqrt
-// fsub FPAddSub
-// fsubs FPGeneral
-// icbi LdStICBI
-// isync SprISYNC
-// lbz LdStLoad
-// lbzu LdStLoadUpd
-// lbzux LdStLoadUpd
-// lbzx LdStLoad
-// ld LdStLD
-// ldarx LdStLDARX
-// ldu LdStLDU
-// ldux LdStLDU
-// ldx LdStLD
-// lfd LdStLFD
-// lfdu LdStLFDU
-// lfdux LdStLFDU
-// lfdx LdStLFD
-// lfs LdStLFD
-// lfsu LdStLFDU
-// lfsux LdStLFDU
-// lfsx LdStLFD
-// lha LdStLHA
-// lhau LdStLHAU
-// lhaux LdStLHAU
-// lhax LdStLHA
-// lhbrx LdStLoad
-// lhz LdStLoad
-// lhzu LdStLoadUpd
-// lhzux LdStLoadUpd
-// lhzx LdStLoad
-// lmw LdStLMW
-// lswi LdStLMW
-// lswx LdStLMW
-// lvebx LdStLVecX
-// lvehx LdStLVecX
-// lvewx LdStLVecX
-// lvsl LdStLVecX
-// lvsr LdStLVecX
-// lvx LdStLVecX
-// lvxl LdStLVecX
-// lwa LdStLWA
-// lwarx LdStLWARX
-// lwaux LdStLHAU
-// lwax LdStLHA
-// lwbrx LdStLoad
-// lwz LdStLoad
-// lwzu LdStLoadUpd
-// lwzux LdStLoadUpd
-// lwzx LdStLoad
-// mcrf BrMCR
-// mcrfs FPGeneral
-// mcrxr BrMCRX
-// mfcr SprMFCR
-// mffs IntMFFS
-// mfmsr SprMFMSR
-// mfspr SprMFSPR
-// mfsr SprMFSR
-// mfsrin SprMFSR
-// mftb SprMFTB
-// mfvscr IntMFVSCR
-// mtcrf BrMCRX
-// mtfsb0 IntMTFSB0
-// mtfsb1 IntMTFSB0
-// mtfsf IntMTFSB0
-// mtfsfi IntMTFSB0
-// mtmsr SprMTMSR
-// mtmsrd LdStLD
-// mtspr SprMTSPR
-// mtsr SprMTSR
-// mtsrd IntMTSRD
-// mtsrdin IntMTSRD
-// mtsrin SprMTSRIN
-// mtvscr IntMFVSCR
-// mulhd IntMulHD
-// mulhdu IntMulHD
-// mulhw IntMulHW
-// mulhwu IntMulHWU
-// mulld IntMulHD
-// mulli IntMulLI
-// mullw IntMulHW
-// nand IntSimple
-// neg IntSimple
-// nor IntSimple
-// or IntSimple
-// orc IntSimple
-// ori IntSimple
-// oris IntSimple
-// rfi SprRFI
-// rfid IntRFID
-// rldcl IntRotateD
-// rldcr IntRotateD
-// rldic IntRotateDI
-// rldicl IntRotateDI
-// rldicr IntRotateDI
-// rldimi IntRotateDI
-// rlwimi IntRotate
-// rlwinm IntGeneral
-// rlwnm IntGeneral
-// sc SprSC
-// slbia LdStSLBIA
-// slbie LdStSLBIE
-// sld IntRotateD
-// slw IntGeneral
-// srad IntRotateD
-// sradi IntRotateDI
-// sraw IntShift
-// srawi IntShift
-// srd IntRotateD
-// srw IntGeneral
-// stb LdStStore
-// stbu LdStStoreUpd
-// stbux LdStStoreUpd
-// stbx LdStStore
-// std LdStSTD
-// stdcx. LdStSTDCX
-// stdu LdStSTDU
-// stdux LdStSTDU
-// stdx LdStSTD
-// stfd LdStSTFD
-// stfdu LdStSTFDU
-// stfdux LdStSTFDU
-// stfdx LdStSTFD
-// stfiwx LdStSTFD
-// stfs LdStSTFD
-// stfsu LdStSTFDU
-// stfsux LdStSTFDU
-// stfsx LdStSTFD
-// sth LdStStore
-// sthbrx LdStStore
-// sthu LdStStoreUpd
-// sthux LdStStoreUpd
-// sthx LdStStore
-// stmw LdStLMW
-// stswi LdStLMW
-// stswx LdStLMW
-// stvebx LdStSTVEBX
-// stvehx LdStSTVEBX
-// stvewx LdStSTVEBX
-// stvx LdStSTVEBX
-// stvxl LdStSTVEBX
-// stw LdStStore
-// stwbrx LdStStore
-// stwcx. LdStSTWCX
-// stwu LdStStoreUpd
-// stwux LdStStoreUpd
-// stwx LdStStore
-// subf IntGeneral
-// subfc IntGeneral
-// subfe IntGeneral
-// subfic IntGeneral
-// subfme IntGeneral
-// subfze IntGeneral
-// sync LdStSync
-// td IntTrapD
-// tdi IntTrapD
-// tlbia LdStSLBIA
-// tlbie LdStDCBF
-// tlbsync SprTLBSYNC
-// tw IntTrapW
-// twi IntTrapW
-// vaddcuw VecGeneral
-// vaddfp VecFP
-// vaddsbs VecGeneral
-// vaddshs VecGeneral
-// vaddsws VecGeneral
-// vaddubm VecGeneral
-// vaddubs VecGeneral
-// vadduhm VecGeneral
-// vadduhs VecGeneral
-// vadduwm VecGeneral
-// vadduws VecGeneral
-// vand VecGeneral
-// vandc VecGeneral
-// vavgsb VecGeneral
-// vavgsh VecGeneral
-// vavgsw VecGeneral
-// vavgub VecGeneral
-// vavguh VecGeneral
-// vavguw VecGeneral
-// vcfsx VecFP
-// vcfux VecFP
-// vcmpbfp VecFPCompare
-// vcmpeqfp VecFPCompare
-// vcmpequb VecGeneral
-// vcmpequh VecGeneral
-// vcmpequw VecGeneral
-// vcmpgefp VecFPCompare
-// vcmpgtfp VecFPCompare
-// vcmpgtsb VecGeneral
-// vcmpgtsh VecGeneral
-// vcmpgtsw VecGeneral
-// vcmpgtub VecGeneral
-// vcmpgtuh VecGeneral
-// vcmpgtuw VecGeneral
-// vctsxs VecFP
-// vctuxs VecFP
-// vexptefp VecFP
-// vlogefp VecFP
-// vmaddfp VecFP
-// vmaxfp VecFPCompare
-// vmaxsb VecGeneral
-// vmaxsh VecGeneral
-// vmaxsw VecGeneral
-// vmaxub VecGeneral
-// vmaxuh VecGeneral
-// vmaxuw VecGeneral
-// vmhaddshs VecComplex
-// vmhraddshs VecComplex
-// vminfp VecFPCompare
-// vminsb VecGeneral
-// vminsh VecGeneral
-// vminsw VecGeneral
-// vminub VecGeneral
-// vminuh VecGeneral
-// vminuw VecGeneral
-// vmladduhm VecComplex
-// vmrghb VecPerm
-// vmrghh VecPerm
-// vmrghw VecPerm
-// vmrglb VecPerm
-// vmrglh VecPerm
-// vmrglw VecPerm
-// vmsubfp VecFP
-// vmsummbm VecComplex
-// vmsumshm VecComplex
-// vmsumshs VecComplex
-// vmsumubm VecComplex
-// vmsumuhm VecComplex
-// vmsumuhs VecComplex
-// vmulesb VecComplex
-// vmulesh VecComplex
-// vmuleub VecComplex
-// vmuleuh VecComplex
-// vmulosb VecComplex
-// vmulosh VecComplex
-// vmuloub VecComplex
-// vmulouh VecComplex
-// vnor VecGeneral
-// vor VecGeneral
-// vperm VecPerm
-// vpkpx VecPerm
-// vpkshss VecPerm
-// vpkshus VecPerm
-// vpkswss VecPerm
-// vpkswus VecPerm
-// vpkuhum VecPerm
-// vpkuhus VecPerm
-// vpkuwum VecPerm
-// vpkuwus VecPerm
-// vrefp VecFPRound
-// vrfim VecFPRound
-// vrfin VecFPRound
-// vrfip VecFPRound
-// vrfiz VecFPRound
-// vrlb VecGeneral
-// vrlh VecGeneral
-// vrlw VecGeneral
-// vrsqrtefp VecFP
-// vsel VecGeneral
-// vsl VecVSL
-// vslb VecGeneral
-// vsldoi VecPerm
-// vslh VecGeneral
-// vslo VecPerm
-// vslw VecGeneral
-// vspltb VecPerm
-// vsplth VecPerm
-// vspltisb VecPerm
-// vspltish VecPerm
-// vspltisw VecPerm
-// vspltw VecPerm
-// vsr VecVSR
-// vsrab VecGeneral
-// vsrah VecGeneral
-// vsraw VecGeneral
-// vsrb VecGeneral
-// vsrh VecGeneral
-// vsro VecPerm
-// vsrw VecGeneral
-// vsubcuw VecGeneral
-// vsubfp VecFP
-// vsubsbs VecGeneral
-// vsubshs VecGeneral
-// vsubsws VecGeneral
-// vsububm VecGeneral
-// vsububs VecGeneral
-// vsubuhm VecGeneral
-// vsubuhs VecGeneral
-// vsubuwm VecGeneral
-// vsubuws VecGeneral
-// vsum2sws VecComplex
-// vsum4sbs VecComplex
-// vsum4shs VecComplex
-// vsum4ubs VecComplex
-// vsumsws VecComplex
-// vupkhpx VecPerm
-// vupkhsb VecPerm
-// vupkhsh VecPerm
-// vupklpx VecPerm
-// vupklsb VecPerm
-// vupklsh VecPerm
-// vxor VecGeneral
-// xor IntSimple
-// xori IntSimple
-// xoris IntSimple
+// add IIC_IntSimple
+// addc IIC_IntGeneral
+// adde IIC_IntGeneral
+// addi IIC_IntSimple
+// addic IIC_IntGeneral
+// addic. IIC_IntGeneral
+// addis IIC_IntSimple
+// addme IIC_IntGeneral
+// addze IIC_IntGeneral
+// and IIC_IntSimple
+// andc IIC_IntSimple
+// andi. IIC_IntGeneral
+// andis. IIC_IntGeneral
+// b IIC_BrB
+// bc IIC_BrB
+// bcctr IIC_BrB
+// bclr IIC_BrB
+// cmp IIC_IntCompare
+// cmpi IIC_IntCompare
+// cmpl IIC_IntCompare
+// cmpli IIC_IntCompare
+// cntlzd IIC_IntRotateD
+// cntlzw IIC_IntGeneral
+// crand IIC_BrCR
+// crandc IIC_BrCR
+// creqv IIC_BrCR
+// crnand IIC_BrCR
+// crnor IIC_BrCR
+// cror IIC_BrCR
+// crorc IIC_BrCR
+// crxor IIC_BrCR
+// dcba IIC_LdStDCBA
+// dcbf IIC_LdStDCBF
+// dcbi IIC_LdStDCBI
+// dcbst IIC_LdStDCBF
+// dcbt IIC_LdStLoad
+// dcbtst IIC_LdStLoad
+// dcbz IIC_LdStDCBF
+// divd IIC_IntDivD
+// divdu IIC_IntDivD
+// divw IIC_IntDivW
+// divwu IIC_IntDivW
+// dss IIC_LdStDSS
+// dst IIC_LdStDSS
+// dstst IIC_LdStDSS
+// eciwx IIC_LdStLoad
+// ecowx IIC_LdStLoad
+// eieio IIC_LdStLoad
+// eqv IIC_IntSimple
+// extsb IIC_IntSimple
+// extsh IIC_IntSimple
+// extsw IIC_IntSimple
+// fabs IIC_FPGeneral
+// fadd IIC_FPAddSub
+// fadds IIC_FPGeneral
+// fcfid IIC_FPGeneral
+// fcmpo IIC_FPCompare
+// fcmpu IIC_FPCompare
+// fctid IIC_FPGeneral
+// fctidz IIC_FPGeneral
+// fctiw IIC_FPGeneral
+// fctiwz IIC_FPGeneral
+// fdiv IIC_FPDivD
+// fdivs IIC_FPDivS
+// fmadd IIC_FPFused
+// fmadds IIC_FPGeneral
+// fmr IIC_FPGeneral
+// fmsub IIC_FPFused
+// fmsubs IIC_FPGeneral
+// fmul IIC_FPFused
+// fmuls IIC_FPGeneral
+// fnabs IIC_FPGeneral
+// fneg IIC_FPGeneral
+// fnmadd IIC_FPFused
+// fnmadds IIC_FPGeneral
+// fnmsub IIC_FPFused
+// fnmsubs IIC_FPGeneral
+// fres IIC_FPRes
+// frsp IIC_FPGeneral
+// frsqrte IIC_FPGeneral
+// fsel IIC_FPGeneral
+// fsqrt IIC_FPSqrtD
+// fsqrts IIC_FPSqrtS
+// fsub IIC_FPAddSub
+// fsubs IIC_FPGeneral
+// icbi IIC_LdStICBI
+// isync IIC_SprISYNC
+// lbz IIC_LdStLoad
+// lbzu IIC_LdStLoadUpd
+// lbzux IIC_LdStLoadUpdX
+// lbzx IIC_LdStLoad
+// ld IIC_LdStLD
+// ldarx IIC_LdStLDARX
+// ldu IIC_LdStLDU
+// ldux IIC_LdStLDUX
+// ldx IIC_LdStLD
+// lfd IIC_LdStLFD
+// lfdu IIC_LdStLFDU
+// lfdux IIC_LdStLFDUX
+// lfdx IIC_LdStLFD
+// lfs IIC_LdStLFD
+// lfsu IIC_LdStLFDU
+// lfsux IIC_LdStLFDUX
+// lfsx IIC_LdStLFD
+// lha IIC_LdStLHA
+// lhau IIC_LdStLHAU
+// lhaux IIC_LdStLHAUX
+// lhax IIC_LdStLHA
+// lhbrx IIC_LdStLoad
+// lhz IIC_LdStLoad
+// lhzu IIC_LdStLoadUpd
+// lhzux IIC_LdStLoadUpdX
+// lhzx IIC_LdStLoad
+// lmw IIC_LdStLMW
+// lswi IIC_LdStLMW
+// lswx IIC_LdStLMW
+// lvebx IIC_LdStLVecX
+// lvehx IIC_LdStLVecX
+// lvewx IIC_LdStLVecX
+// lvsl IIC_LdStLVecX
+// lvsr IIC_LdStLVecX
+// lvx IIC_LdStLVecX
+// lvxl IIC_LdStLVecX
+// lwa IIC_LdStLWA
+// lwarx IIC_LdStLWARX
+// lwaux IIC_LdStLHAUX
+// lwax IIC_LdStLHA
+// lwbrx IIC_LdStLoad
+// lwz IIC_LdStLoad
+// lwzu IIC_LdStLoadUpd
+// lwzux IIC_LdStLoadUpdX
+// lwzx IIC_LdStLoad
+// mcrf IIC_BrMCR
+// mcrfs IIC_FPGeneral
+// mcrxr IIC_BrMCRX
+// mfcr IIC_SprMFCR
+// mffs IIC_IntMFFS
+// mfmsr IIC_SprMFMSR
+// mfspr IIC_SprMFSPR
+// mfsr IIC_SprMFSR
+// mfsrin IIC_SprMFSR
+// mftb IIC_SprMFTB
+// mfvscr IIC_IntMFVSCR
+// mtcrf IIC_BrMCRX
+// mtfsb0 IIC_IntMTFSB0
+// mtfsb1 IIC_IntMTFSB0
+// mtfsf IIC_IntMTFSB0
+// mtfsfi IIC_IntMTFSB0
+// mtmsr IIC_SprMTMSR
+// mtmsrd IIC_LdStLD
+// mtspr IIC_SprMTSPR
+// mtsr IIC_SprMTSR
+// mtsrd IIC_IntMTSRD
+// mtsrdin IIC_IntMTSRD
+// mtsrin IIC_SprMTSRIN
+// mtvscr IIC_IntMFVSCR
+// mulhd IIC_IntMulHD
+// mulhdu IIC_IntMulHD
+// mulhw IIC_IntMulHW
+// mulhwu IIC_IntMulHWU
+// mulld IIC_IntMulHD
+// mulli IIC_IntMulLI
+// mullw IIC_IntMulHW
+// nand IIC_IntSimple
+// neg IIC_IntSimple
+// nor IIC_IntSimple
+// or IIC_IntSimple
+// orc IIC_IntSimple
+// ori IIC_IntSimple
+// oris IIC_IntSimple
+// rfi IIC_SprRFI
+// rfid IIC_IntRFID
+// rldcl IIC_IntRotateD
+// rldcr IIC_IntRotateD
+// rldic IIC_IntRotateDI
+// rldicl IIC_IntRotateDI
+// rldicr IIC_IntRotateDI
+// rldimi IIC_IntRotateDI
+// rlwimi IIC_IntRotate
+// rlwinm IIC_IntGeneral
+// rlwnm IIC_IntGeneral
+// sc IIC_SprSC
+// slbia IIC_LdStSLBIA
+// slbie IIC_LdStSLBIE
+// sld IIC_IntRotateD
+// slw IIC_IntGeneral
+// srad IIC_IntRotateD
+// sradi IIC_IntRotateDI
+// sraw IIC_IntShift
+// srawi IIC_IntShift
+// srd IIC_IntRotateD
+// srw IIC_IntGeneral
+// stb IIC_LdStStore
+// stbu IIC_LdStStoreUpd
+// stbux IIC_LdStStoreUpd
+// stbx IIC_LdStStore
+// std IIC_LdStSTD
+// stdcx. IIC_LdStSTDCX
+// stdu IIC_LdStSTDU
+// stdux IIC_LdStSTDUX
+// stdx IIC_LdStSTD
+// stfd IIC_LdStSTFD
+// stfdu IIC_LdStSTFDU
+// stfdux IIC_LdStSTFDU
+// stfdx IIC_LdStSTFD
+// stfiwx IIC_LdStSTFD
+// stfs IIC_LdStSTFD
+// stfsu IIC_LdStSTFDU
+// stfsux IIC_LdStSTFDU
+// stfsx IIC_LdStSTFD
+// sth IIC_LdStStore
+// sthbrx IIC_LdStStore
+// sthu IIC_LdStStoreUpd
+// sthux IIC_LdStStoreUpd
+// sthx IIC_LdStStore
+// stmw IIC_LdStLMW
+// stswi IIC_LdStLMW
+// stswx IIC_LdStLMW
+// stvebx IIC_LdStSTVEBX
+// stvehx IIC_LdStSTVEBX
+// stvewx IIC_LdStSTVEBX
+// stvx IIC_LdStSTVEBX
+// stvxl IIC_LdStSTVEBX
+// stw IIC_LdStStore
+// stwbrx IIC_LdStStore
+// stwcx. IIC_LdStSTWCX
+// stwu IIC_LdStStoreUpd
+// stwux IIC_LdStStoreUpd
+// stwx IIC_LdStStore
+// subf IIC_IntGeneral
+// subfc IIC_IntGeneral
+// subfe IIC_IntGeneral
+// subfic IIC_IntGeneral
+// subfme IIC_IntGeneral
+// subfze IIC_IntGeneral
+// sync IIC_LdStSync
+// td IIC_IntTrapD
+// tdi IIC_IntTrapD
+// tlbia IIC_LdStSLBIA
+// tlbie IIC_LdStDCBF
+// tlbsync IIC_SprTLBSYNC
+// tw IIC_IntTrapW
+// twi IIC_IntTrapW
+// vaddcuw IIC_VecGeneral
+// vaddfp IIC_VecFP
+// vaddsbs IIC_VecGeneral
+// vaddshs IIC_VecGeneral
+// vaddsws IIC_VecGeneral
+// vaddubm IIC_VecGeneral
+// vaddubs IIC_VecGeneral
+// vadduhm IIC_VecGeneral
+// vadduhs IIC_VecGeneral
+// vadduwm IIC_VecGeneral
+// vadduws IIC_VecGeneral
+// vand IIC_VecGeneral
+// vandc IIC_VecGeneral
+// vavgsb IIC_VecGeneral
+// vavgsh IIC_VecGeneral
+// vavgsw IIC_VecGeneral
+// vavgub IIC_VecGeneral
+// vavguh IIC_VecGeneral
+// vavguw IIC_VecGeneral
+// vcfsx IIC_VecFP
+// vcfux IIC_VecFP
+// vcmpbfp IIC_VecFPCompare
+// vcmpeqfp IIC_VecFPCompare
+// vcmpequb IIC_VecGeneral
+// vcmpequh IIC_VecGeneral
+// vcmpequw IIC_VecGeneral
+// vcmpgefp IIC_VecFPCompare
+// vcmpgtfp IIC_VecFPCompare
+// vcmpgtsb IIC_VecGeneral
+// vcmpgtsh IIC_VecGeneral
+// vcmpgtsw IIC_VecGeneral
+// vcmpgtub IIC_VecGeneral
+// vcmpgtuh IIC_VecGeneral
+// vcmpgtuw IIC_VecGeneral
+// vctsxs IIC_VecFP
+// vctuxs IIC_VecFP
+// vexptefp IIC_VecFP
+// vlogefp IIC_VecFP
+// vmaddfp IIC_VecFP
+// vmaxfp IIC_VecFPCompare
+// vmaxsb IIC_VecGeneral
+// vmaxsh IIC_VecGeneral
+// vmaxsw IIC_VecGeneral
+// vmaxub IIC_VecGeneral
+// vmaxuh IIC_VecGeneral
+// vmaxuw IIC_VecGeneral
+// vmhaddshs IIC_VecComplex
+// vmhraddshs IIC_VecComplex
+// vminfp IIC_VecFPCompare
+// vminsb IIC_VecGeneral
+// vminsh IIC_VecGeneral
+// vminsw IIC_VecGeneral
+// vminub IIC_VecGeneral
+// vminuh IIC_VecGeneral
+// vminuw IIC_VecGeneral
+// vmladduhm IIC_VecComplex
+// vmrghb IIC_VecPerm
+// vmrghh IIC_VecPerm
+// vmrghw IIC_VecPerm
+// vmrglb IIC_VecPerm
+// vmrglh IIC_VecPerm
+// vmrglw IIC_VecPerm
+// vmsubfp IIC_VecFP
+// vmsummbm IIC_VecComplex
+// vmsumshm IIC_VecComplex
+// vmsumshs IIC_VecComplex
+// vmsumubm IIC_VecComplex
+// vmsumuhm IIC_VecComplex
+// vmsumuhs IIC_VecComplex
+// vmulesb IIC_VecComplex
+// vmulesh IIC_VecComplex
+// vmuleub IIC_VecComplex
+// vmuleuh IIC_VecComplex
+// vmulosb IIC_VecComplex
+// vmulosh IIC_VecComplex
+// vmuloub IIC_VecComplex
+// vmulouh IIC_VecComplex
+// vnor IIC_VecGeneral
+// vor IIC_VecGeneral
+// vperm IIC_VecPerm
+// vpkpx IIC_VecPerm
+// vpkshss IIC_VecPerm
+// vpkshus IIC_VecPerm
+// vpkswss IIC_VecPerm
+// vpkswus IIC_VecPerm
+// vpkuhum IIC_VecPerm
+// vpkuhus IIC_VecPerm
+// vpkuwum IIC_VecPerm
+// vpkuwus IIC_VecPerm
+// vrefp IIC_VecFPRound
+// vrfim IIC_VecFPRound
+// vrfin IIC_VecFPRound
+// vrfip IIC_VecFPRound
+// vrfiz IIC_VecFPRound
+// vrlb IIC_VecGeneral
+// vrlh IIC_VecGeneral
+// vrlw IIC_VecGeneral
+// vrsqrtefp IIC_VecFP
+// vsel IIC_VecGeneral
+// vsl IIC_VecVSL
+// vslb IIC_VecGeneral
+// vsldoi IIC_VecPerm
+// vslh IIC_VecGeneral
+// vslo IIC_VecPerm
+// vslw IIC_VecGeneral
+// vspltb IIC_VecPerm
+// vsplth IIC_VecPerm
+// vspltisb IIC_VecPerm
+// vspltish IIC_VecPerm
+// vspltisw IIC_VecPerm
+// vspltw IIC_VecPerm
+// vsr IIC_VecVSR
+// vsrab IIC_VecGeneral
+// vsrah IIC_VecGeneral
+// vsraw IIC_VecGeneral
+// vsrb IIC_VecGeneral
+// vsrh IIC_VecGeneral
+// vsro IIC_VecPerm
+// vsrw IIC_VecGeneral
+// vsubcuw IIC_VecGeneral
+// vsubfp IIC_VecFP
+// vsubsbs IIC_VecGeneral
+// vsubshs IIC_VecGeneral
+// vsubsws IIC_VecGeneral
+// vsububm IIC_VecGeneral
+// vsububs IIC_VecGeneral
+// vsubuhm IIC_VecGeneral
+// vsubuhs IIC_VecGeneral
+// vsubuwm IIC_VecGeneral
+// vsubuws IIC_VecGeneral
+// vsum2sws IIC_VecComplex
+// vsum4sbs IIC_VecComplex
+// vsum4shs IIC_VecComplex
+// vsum4ubs IIC_VecComplex
+// vsumsws IIC_VecComplex
+// vupkhpx IIC_VecPerm
+// vupkhsb IIC_VecPerm
+// vupkhsh IIC_VecPerm
+// vupklpx IIC_VecPerm
+// vupklsb IIC_VecPerm
+// vupklsh IIC_VecPerm
+// vxor IIC_VecGeneral
+// xor IIC_IntSimple
+// xori IIC_IntSimple
+// xoris IIC_IntSimple
//
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 37b6eac..218fed2 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -26,43 +26,39 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC 440/450 chip sets
//
-def IFTH1 : FuncUnit; // Fetch unit 1
-def IFTH2 : FuncUnit; // Fetch unit 2
-def PDCD1 : FuncUnit; // Decode unit 1
-def PDCD2 : FuncUnit; // Decode unit 2
-def DISS1 : FuncUnit; // Issue unit 1
-def DISS2 : FuncUnit; // Issue unit 2
-def LRACC : FuncUnit; // Register access and dispatch for
- // the simple integer (J-pipe) and
- // load/store (L-pipe) pipelines
-def IRACC : FuncUnit; // Register access and dispatch for
- // the complex integer (I-pipe) pipeline
-def FRACC : FuncUnit; // Register access and dispatch for
- // the floating-point execution (F-pipe) pipeline
-def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
-def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
-def IWB : FuncUnit; // Write-back unit for the I pipeline
-def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
-def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
-def JWB : FuncUnit; // Write-back unit for the J pipeline
-def AGEN : FuncUnit; // Address generation for the L pipeline
-def CRD : FuncUnit; // D-cache access for the L pipeline
-def LWB : FuncUnit; // Write-back unit for the L pipeline
-def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
-def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
-def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
-def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
-def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
-def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
-def FWB : FuncUnit; // Write-back unit for the F pipeline
+def P440_DISS1 : FuncUnit; // Issue unit 1
+def P440_DISS2 : FuncUnit; // Issue unit 2
+def P440_LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def P440_IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def P440_FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def P440_IWB : FuncUnit; // Write-back unit for the I pipeline
+def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def P440_JWB : FuncUnit; // Write-back unit for the J pipeline
+def P440_AGEN : FuncUnit; // Address generation for the L pipeline
+def P440_CRD : FuncUnit; // D-cache access for the L pipeline
+def P440_LWB : FuncUnit; // Write-back unit for the L pipeline
+def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def P440_FWB : FuncUnit; // Write-back unit for the F pipeline
-def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
- // to make sure that no lwarx/stwcx.
- // instructions are issued while another
- // lwarx/stwcx. is in the L pipe.
+def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
-def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
-def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs.
// Notes:
// Instructions are held in the FRACC, LRACC and IRACC pipeline
@@ -104,560 +100,500 @@ def FPR_Bypass : Bypass; // The bypass for floating-point regs.
def PPC440Itineraries : ProcessorItineraries<
- [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
- IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
- FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
- [GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<33, [IWB]>],
- [40, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [9, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<3, [AGEN], 1>,
- InstrStage<2, [CRD], 1>,
- InstrStage<1, [LWB]>]>,
- InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC], 0>,
- InstrStage<1, [LRACC], 0>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [FEXE1], 0>,
- InstrStage<1, [AGEN], 0>,
- InstrStage<1, [JEXE1], 0>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [FEXE2], 0>,
- InstrStage<1, [CRD], 0>,
- InstrStage<1, [JEXE2], 0>,
- InstrStage<1, [IEXE2]>,
- InstrStage<6, [FEXE3], 0>,
- InstrStage<6, [LWB], 0>,
- InstrStage<6, [JWB], 0>,
- InstrStage<6, [IWB]>]>,
- InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [9, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<25, [FWB]>],
- [35, 4, 4],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<13, [FWB]>],
- [23, 4, 4],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4],
- [FPR_Bypass, FPR_Bypass]>
+ [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2,
+ P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD,
+ P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5,
+ P440_FEXE6, P440_FWB, P440_LWARX_Hold],
+ [P440_GPR_Bypass, P440_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<33, [P440_IWB]>],
+ [36, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<3, [P440_AGEN], 1>,
+ InstrStage<2, [P440_CRD], 1>,
+ InstrStage<1, [P440_LWB]>]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC], 0>,
+ InstrStage<1, [P440_LRACC], 0>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_FEXE1], 0>,
+ InstrStage<1, [P440_AGEN], 0>,
+ InstrStage<1, [P440_JEXE1], 0>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_FEXE2], 0>,
+ InstrStage<1, [P440_CRD], 0>,
+ InstrStage<1, [P440_JEXE2], 0>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<6, [P440_FEXE3], 0>,
+ InstrStage<6, [P440_LWB], 0>,
+ InstrStage<6, [P440_JWB], 0>,
+ InstrStage<6, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [5, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<25, [P440_FWB]>],
+ [31, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<13, [P440_FWB]>],
+ [19, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// PPC440 machine model for scheduling and other instruction cost heuristics.
+
+def PPC440Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 instructions are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPC440Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 1612cd2..1447696 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -14,8 +14,8 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC A2 chip sets
//
-def XU : FuncUnit; // XU pipeline
-def FU : FuncUnit; // FI pipeline
+def A2_XU : FuncUnit; // A2_XU pipeline
+def A2_FU : FuncUnit; // FI pipeline
//
// This file defines the itinerary class data for the PPC A2 processor.
@@ -24,126 +24,140 @@ def FU : FuncUnit; // FI pipeline
def PPCA2Itineraries : ProcessorItineraries<
- [XU, FU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<IntGeneral , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntCompare , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntDivW , [InstrStage<1, [XU]>],
- [39, 1, 1]>,
- InstrItinData<IntDivD , [InstrStage<1, [XU]>],
- [71, 1, 1]>,
- InstrItinData<IntMulHW , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<IntMulLI , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<IntRotate , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntRotateD , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntRotateDI , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntShift , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntTrapW , [InstrStage<1, [XU]>],
- [2, 1]>,
- InstrItinData<IntTrapD , [InstrStage<1, [XU]>],
- [2, 1]>,
- InstrItinData<BrB , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<BrCR , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<BrMCR , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<BrMCRX , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStLoad , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>],
- [6, 8, 1, 1]>,
- InstrItinData<LdStLDU , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStStore , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStICBI, [InstrStage<1, [XU]>],
- [16, 1, 1]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStLFD , [InstrStage<1, [XU]>],
- [7, 1, 1]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [XU]>],
- [7, 9, 1, 1]>,
- InstrItinData<LdStLHA , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [XU]>],
- [6, 8, 1, 1]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSTD , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSync , [InstrStage<1, [XU]>],
- [6]>,
- InstrItinData<SprISYNC , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [XU]>],
- [16, 1]>,
- InstrItinData<SprMFCR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [XU]>],
- [4, 1]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprMFTB , [InstrStage<1, [XU]>],
- [4, 1]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprRFI , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<SprSC , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FU]>],
- [6, 1, 1]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FU]>],
- [6, 1, 1]>,
- InstrItinData<FPCompare , [InstrStage<1, [FU]>],
- [5, 1, 1]>,
- InstrItinData<FPDivD , [InstrStage<1, [FU]>],
- [72, 1, 1]>,
- InstrItinData<FPDivS , [InstrStage<1, [FU]>],
- [59, 1, 1]>,
- InstrItinData<FPSqrt , [InstrStage<1, [FU]>],
- [69, 1, 1]>,
- InstrItinData<FPFused , [InstrStage<1, [FU]>],
- [6, 1, 1, 1]>,
- InstrItinData<FPRes , [InstrStage<1, [FU]>],
- [6, 1]>
+ [A2_XU, A2_FU], [], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>],
+ [39, 0, 0]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>],
+ [71, 0, 0]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
+ [16, 0, 0]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>],
+ [7, 0, 0]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>],
+ [6]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>],
+ [16, 0]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>],
+ [1, 0]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>],
+ [72, 0, 0]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>],
+ [59, 0, 0]>,
+ InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>],
+ [69, 0, 0]>,
+ InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>],
+ [65, 0, 0]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0, 0]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>],
+ [6, 0]>
]>;
// ===---------------------------------------------------------------------===//
// A2 machine model for scheduling and other instruction cost heuristics.
def PPCA2Model : SchedMachineModel {
- let IssueWidth = 1; // 2 micro-ops are dispatched per cycle.
+ let IssueWidth = 1; // 1 instruction is dispatched per cycle.
let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
let LoadLatency = 6; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index c189b9e..dab89e3 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -19,238 +19,285 @@
// * Decode & Dispatch
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
-def DIS0 : FuncUnit; // Dispatch stage - insn 1
-def DIS1 : FuncUnit; // Dispatch stage - insn 2
+def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1
+def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2
// * Execute
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
// Some instructions can only execute in SFX0 but not SFX1.
// The CFX has a bypass path, allowing non-divide instructions to execute
// while a divide instruction is executed.
-def SFX0 : FuncUnit; // Simple unit 0
-def SFX1 : FuncUnit; // Simple unit 1
-def BU : FuncUnit; // Branch unit
-def CFX_DivBypass
- : FuncUnit; // CFX divide bypass path
-def CFX_0 : FuncUnit; // CFX pipeline
-def LSU_0 : FuncUnit; // LSU pipeline
-def FPU_0 : FuncUnit; // FPU pipeline
+def E500_SFX0 : FuncUnit; // Simple unit 0
+def E500_SFX1 : FuncUnit; // Simple unit 1
+def E500_BU : FuncUnit; // Branch unit
+def E500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E500_CFX_0 : FuncUnit; // CFX pipeline
+def E500_LSU_0 : FuncUnit; // LSU pipeline
+def E500_FPU_0 : FuncUnit; // FPU pipeline
-def CR_Bypass : Bypass;
+def E500_GPR_Bypass : Bypass;
+def E500_FPR_Bypass : Bypass;
+def E500_CR_Bypass : Bypass;
def PPCE500mcItineraries : ProcessorItineraries<
- [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 1, 1], // Latency = 1 or 2
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<14, [CFX_DivBypass]>],
- [17, 1, 1], // Latency=4..35, Repeat= 4..35
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<8, [FPU_0]>],
- [11], // Latency = 8
- [FPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<8, [FPU_0]>],
- [11, 1, 1], // Latency = 8
- [NoBypass, NoBypass, NoBypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0]>],
- [5, 1], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1], // Latency = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1, 1], // Latency = 1
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1], // Latency = 1
- [CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1, 1], // Latency = 4
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1, 1], // Latency = 4
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1], // Latency = r+3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [6, 1, 1], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>]>,
- InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [5, 1], // Latency = 2, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0]>],
- [5, 1],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0], 0>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<5, [SFX0]>],
- [8, 1],
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1], // Latency = 4, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1], // Latency = 1, Repeat rate = 1
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1], // Latency = 4, Repeat rate = 4
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1], // Latency = 1, Repeat rate = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0]>],
- [4, 1],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [11, 1, 1], // Latency = 8, Repeat rate = 2
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [FPU_0]>],
- [13, 1, 1], // Latency = 10, Repeat rate = 4
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [11, 1, 1], // Latency = 8, Repeat rate = 2
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<68, [FPU_0]>],
- [71, 1, 1], // Latency = 68, Repeat rate = 68
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<38, [FPU_0]>],
- [41, 1, 1], // Latency = 38, Repeat rate = 38
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [FPU_0]>],
- [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<38, [FPU_0]>],
- [41, 1], // Latency = 38, Repeat rate = 38
- [FPR_Bypass, FPR_Bypass]>
+ [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass,
+ E500_CFX_0, E500_LSU_0, E500_FPU_0],
+ [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [E500_CR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0], 0>,
+ InstrStage<14, [E500_CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11], // Latency = 8
+ [E500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass,
+ E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<3, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1],
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0, E500_SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [5, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_CR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<68, [E500_FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass,
+ E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass, E500_FPR_Bypass]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index 7a24d20..de097d9 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -20,280 +20,344 @@
// * Decode & Dispatch
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
-// def DIS0 : FuncUnit;
-// def DIS1 : FuncUnit;
+def E5500_DIS0 : FuncUnit;
+def E5500_DIS1 : FuncUnit;
// * Execute
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
// The CFX has a bypass path, allowing non-divide instructions to execute
// while a divide instruction is being executed.
-// def SFX0 : FuncUnit; // Simple unit 0
-// def SFX1 : FuncUnit; // Simple unit 1
-// def BU : FuncUnit; // Branch unit
-// def CFX_DivBypass
-// : FuncUnit; // CFX divide bypass path
-// def CFX_0 : FuncUnit; // CFX pipeline stage 0
+def E5500_SFX0 : FuncUnit; // Simple unit 0
+def E5500_SFX1 : FuncUnit; // Simple unit 1
+def E5500_BU : FuncUnit; // Branch unit
+def E5500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0
-def CFX_1 : FuncUnit; // CFX pipeline stage 1
+def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1
-// def LSU_0 : FuncUnit; // LSU pipeline
-// def FPU_0 : FuncUnit; // FPU pipeline
+def E5500_LSU_0 : FuncUnit; // LSU pipeline
+def E5500_FPU_0 : FuncUnit; // FPU pipeline
-// def CR_Bypass : Bypass;
+def E5500_GPR_Bypass : Bypass;
+def E5500_FPR_Bypass : Bypass;
+def E5500_CR_Bypass : Bypass;
def PPCE5500Itineraries : ProcessorItineraries<
- [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
- LSU_0, FPU_0],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 1 or 2
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<26, [CFX_DivBypass]>],
- [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<16, [CFX_DivBypass]>],
- [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11], // Latency = 7, Repeat rate = 1
- [FPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<7, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 7
- [NoBypass, NoBypass, NoBypass]>,
- InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<2, [CFX_1]>],
- [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<1, [CFX_1]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<1, [CFX_1]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<2, [CFX_1]>],
- [8, 2, 2], // Latency = 4 or 5, Repeat = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0]>],
- [6, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2], // Latency = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2, 2], // Latency = 1
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2], // Latency = 1
- [CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [5, 2, 2], // Latency = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [LSU_0]>],
- [8, 2], // Latency = r+3, Repeat rate = r+3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [CFX_0]>],
- [6, 2], // Latency = 2, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0], 0>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<5, [CFX_0]>],
- [9, 2], // Latency = 5, Repeat rate = 5
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [8, 2], // Latency = 4, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [5], // Latency = 1, Repeat rate = 1
- [GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [CFX_0]>],
- [8, 2], // Latency = 4, Repeat rate = 4
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5], // Latency = 1, Repeat rate = 1
- [GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<31, [FPU_0]>],
- [39, 2, 2], // Latency = 35, Repeat rate = 31
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<16, [FPU_0]>],
- [24, 2, 2], // Latency = 20, Repeat rate = 16
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [12, 2], // Latency = 8, Repeat rate = 2
- [FPR_Bypass, FPR_Bypass]>
+ [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU,
+ E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1,
+ E5500_LSU_0, E5500_FPU_0],
+ [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [E5500_CR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<26, [E5500_CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<16, [E5500_CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<7, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass,
+ E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_CR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<31, [E5500_FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<16, [E5500_FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass,
+ E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [E5500_FPR_Bypass, E5500_FPR_Bypass]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 72a0a39..21efd8f 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -11,61 +11,70 @@
//
//===----------------------------------------------------------------------===//
+def G3_BPU : FuncUnit; // Branch unit
+def G3_SLU : FuncUnit; // Store/load unit
+def G3_SRU : FuncUnit; // special register unit
+def G3_IU1 : FuncUnit; // integer unit 1 (simple)
+def G3_IU2 : FuncUnit; // integer unit 2 (complex)
+def G3_FPU1 : FuncUnit; // floating point unit 1
def G3Itineraries : ProcessorItineraries<
- [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
- InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
- InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+ [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index fc9120d..340773e 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -11,71 +11,86 @@
//
//===----------------------------------------------------------------------===//
+def G4_BPU : FuncUnit; // Branch unit
+def G4_SLU : FuncUnit; // Store/load unit
+def G4_SRU : FuncUnit; // special register unit
+def G4_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4_FPU1 : FuncUnit; // floating point unit 1
+def G4_VPU : FuncUnit; // vector permutation unit
+def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4_VFPU : FuncUnit; // vector floating point unit
+
def G4Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
- InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
- InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+ [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1,
+ G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index a4e82ce..1d9f13f 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -11,78 +11,102 @@
//
//===----------------------------------------------------------------------===//
-def IU3 : FuncUnit; // integer unit 3 (7450 simple)
-def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+def G4P_BPU : FuncUnit; // Branch unit
+def G4P_SLU : FuncUnit; // Store/load unit
+def G4P_SRU : FuncUnit; // special register unit
+def G4P_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4P_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4P_IU3 : FuncUnit; // integer unit 3 (simple)
+def G4P_IU4 : FuncUnit; // integer unit 4 (simple)
+def G4P_FPU1 : FuncUnit; // floating point unit 1
+def G4P_VPU : FuncUnit; // vector permutation unit
+def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4P_VFPU : FuncUnit; // vector floating point unit
def G4PlusItineraries : ProcessorItineraries<
- [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
- InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
- InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
- InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
- InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
- InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
- InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
- InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
- InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
- InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+ [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1,
+ G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index c64998d..a3b73ab 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -11,90 +11,110 @@
//
//===----------------------------------------------------------------------===//
+def G5_BPU : FuncUnit; // Branch unit
+def G5_SLU : FuncUnit; // Store/load unit
+def G5_SRU : FuncUnit; // special register unit
+def G5_IU1 : FuncUnit; // integer unit 1 (simple)
+def G5_IU2 : FuncUnit; // integer unit 2 (complex)
+def G5_FPU1 : FuncUnit; // floating point unit 1
+def G5_FPU2 : FuncUnit; // floating point unit 2
+def G5_VPU : FuncUnit; // vector permutation unit
+def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G5_VFPU : FuncUnit; // vector floating point unit
+
def G5Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
- InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
- InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
- InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
- InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
- InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
- InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
- InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
- InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
- InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
- InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
- InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
- InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
- InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
- InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
- InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
- InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
- InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
- InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
- InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
- InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
- InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+ [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2,
+ G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]>
]>;
// ===---------------------------------------------------------------------===//
-// e5500 machine model for scheduling and other instruction cost heuristics.
+// G5 machine model for scheduling and other instruction cost heuristics.
def G5Model : SchedMachineModel {
let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
new file mode 100644
index 0000000..d3e4269
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -0,0 +1,385 @@
+//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// IBM POWER7 multicore server processor
+// B. Sinharoy, et al.
+// IBM J. Res. & Dev. (55) 3. May/June 2011.
+
+// Scheduling for the P7 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P7_DU1 : FuncUnit;
+def P7_DU2 : FuncUnit;
+def P7_DU3 : FuncUnit;
+def P7_DU4 : FuncUnit;
+def P7_DU5 : FuncUnit;
+def P7_DU6 : FuncUnit;
+
+def P7_LS1 : FuncUnit; // Load/Store pipeline 1
+def P7_LS2 : FuncUnit; // Load/Store pipeline 2
+
+def P7_FX1 : FuncUnit; // FX pipeline 1
+def P7_FX2 : FuncUnit; // FX pipeline 2
+
+// VS pipeline 1 (vector integer ops. always here)
+def P7_VS1 : FuncUnit; // VS pipeline 1
+// VS pipeline 2 (128-bit stores and perms. here)
+def P7_VS2 : FuncUnit; // VS pipeline 2
+
+def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P7_BRU : FuncUnit; // BR unit
+
+// Notes:
+// Each LSU pipeline can also execute FX add and logical instructions.
+// Each LSU pipeline can complete a load or store in one cycle.
+//
+// Each store is broken into two parts, AGEN goes to the LSU while a
+// "data steering" op. goes to the FXU or VSU.
+//
+// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
+// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
+//
+// Frequent FX ops. take only one cycle and results can be used again in the
+// next cycle (there is a self-bypass). Getting results from the other FX
+// pipeline takes an additional cycle.
+//
+// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
+// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
+// Dispatch of an instruction to VS1 that uses four single prec. inputs
+// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
+// floating point instruction.
+//
+// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
+// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
+// (unlike on the POWER6).
+//
+// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
+// share the same write-back, and have a 5-cycle latency difference, so the
+// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
+// op. has been dispatched to VS1.
+//
+// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
+//
+// Instruction dispatch groups have (at most) four non-branch instructions, and
+// two branches. Unlike on the POWER4/5, a branch does not automatically
+// end the dispatch group, but a second branch must be the last in the group.
+
+def P7Itineraries : ProcessorItineraries<
+ [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
+ P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2,
+ P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ // FIXME: Add record-form itinerary data.
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<36, [P7_FX1, P7_FX2]>],
+ [36, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<68, [P7_FX1, P7_FX2]>],
+ [68, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_CRU]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1]>, // mtcr
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P7 machine model for scheduling and other instruction cost heuristics.
+
+def P7Model : SchedMachineModel {
+ let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
+ // Note that the dispatch bundle size is 6 (including
+ // branches), but the total internal issue bandwidth per
+ // cycle (from all queues) is 8.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = P7Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 7231ab1..b07abe4 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -31,12 +31,24 @@
using namespace llvm;
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit)
+ const std::string &FS, bool is64Bit,
+ CodeGenOpt::Level OptLevel)
: PPCGenSubtargetInfo(TT, CPU, FS)
, IsPPC64(is64Bit)
, TargetTriple(TT) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+
+ std::string FullFS = FS;
+
+ // At -O2 and above, track CR bits as individual registers.
+ if (OptLevel >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+
+ resetSubtargetFeatures(CPU, FullFS);
}
/// SetJITMode - This is called to inform the subtarget info that we are
@@ -73,8 +85,10 @@ void PPCSubtarget::initializeEnvironment() {
HasMFOCRF = false;
Has64BitSupport = false;
Use64BitRegs = false;
+ UseCRBits = false;
HasAltivec = false;
HasQPX = false;
+ HasVSX = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -179,7 +193,7 @@ bool PPCSubtarget::enablePostRAScheduler(
return OptLevel >= CodeGenOpt::Default;
}
-// Embedded cores need aggressive scheduling.
+// Embedded cores need aggressive scheduling (and some others also benefit).
static bool needsAggressiveScheduling(unsigned Directive) {
switch (Directive) {
default: return false;
@@ -187,6 +201,7 @@ static bool needsAggressiveScheduling(unsigned Directive) {
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
+ case PPC::DIR_PWR7:
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index c863a6e..87e012e 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -73,6 +73,7 @@ protected:
bool HasMFOCRF;
bool Has64BitSupport;
bool Use64BitRegs;
+ bool UseCRBits;
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
@@ -103,7 +104,8 @@ public:
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit);
+ const std::string &FS, bool is64Bit,
+ CodeGenOpt::Level OptLevel);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -126,22 +128,6 @@ public:
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- /// getDataLayoutString - Return the pointer size and type alignment
- /// properties of this subtarget.
- const char *getDataLayoutString() const {
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (isPPC64() && isSVR4ABI()) {
- if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
- return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
- else
- return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
- }
-
- return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
- : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
- }
-
/// \brief Reset the features for the PowerPC target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
private:
@@ -162,6 +148,10 @@ public:
/// has64BitSupport() returns true.
bool use64BitRegs() const { return Use64BitRegs; }
+ /// useCRBits - Return true if we should store and manipulate i1 values in
+ /// the individual condition register bits.
+ bool useCRBits() const { return UseCRBits; }
+
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
@@ -188,6 +178,7 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
@@ -200,8 +191,6 @@ public:
/// isDarwin - True if this is any darwin platform.
bool isDarwin() const { return TargetTriple.isMacOSX(); }
- /// isBGP - True if this is a BG/P platform.
- bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9acefe5..e7438f3 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -26,6 +26,10 @@ static cl::
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
+static cl::opt<bool>
+VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
+ cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -33,6 +37,41 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -40,15 +79,11 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool is64Bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64Bit),
- DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
+ Subtarget(TT, CPU, FS, is64Bit, OL),
+ DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
-
- // The binutils for the BG/P are too old for CFI.
- if (Subtarget.isBGP())
- setMCUseCFI(false);
initAsmInfo();
}
@@ -95,6 +130,7 @@ public:
virtual bool addPreISel();
virtual bool addILPOpts();
virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
@@ -129,10 +165,26 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
+ if (getPPCSubtarget().hasVSX())
+ addPass(createPPCVSXCopyPass());
+
+ return false;
+}
+
+bool PPCPassConfig::addPreRegAlloc() {
+ if (getPPCSubtarget().hasVSX()) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ &PPCVSXFMAMutateID);
+ }
+
return false;
}
bool PPCPassConfig::addPreSched2() {
+ if (getPPCSubtarget().hasVSX())
+ addPass(createPPCVSXCopyCleanupPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index ec1e606..2903cc1 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetObjectFile.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
using namespace llvm;
@@ -22,16 +22,9 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
InitializeELF(TM.Options.UseInitArray);
}
-const MCSection * PPC64LinuxTargetObjectFile::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
-
- const MCSection *DefaultSection =
- TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
-
- if (DefaultSection != ReadOnlySection)
- return DefaultSection;
-
+const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
// Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
// when we have a constant that contains global relocations. This is
// necessary because of this ABI's handling of pointers to functions in
@@ -46,14 +39,17 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// linker, so we must use DataRelROSection instead of ReadOnlySection.
// For more information, see the description of ELIMINATE_COPY_RELOCS in
// GNU ld.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (Kind.isReadOnly()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (GVar && GVar->isConstant() &&
- (GVar->getInitializer()->getRelocationInfo() ==
- Constant::GlobalRelocations))
- return DataRelROSection;
+ if (GVar && GVar->isConstant() &&
+ (GVar->getInitializer()->getRelocationInfo() ==
+ Constant::GlobalRelocations))
+ Kind = SectionKind::getReadOnlyWithRel();
+ }
- return DefaultSection;
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
+ Mang, TM);
}
const MCExpr *PPC64LinuxTargetObjectFile::
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 262c522..3e71bbc 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -20,14 +20,14 @@ namespace llvm {
/// 64-bit PowerPC Linux.
class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
- virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- virtual const MCSection *
- SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const;
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
/// \brief Describe a TLS variable address within debug info.
- virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index e876be1..74b5f45 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -15,8 +15,10 @@
namespace llvm {
class PPCTargetStreamer : public MCTargetStreamer {
public:
+ PPCTargetStreamer(MCStreamer &S);
virtual ~PPCTargetStreamer();
virtual void emitTCEntry(const MCSymbol &S) = 0;
+ virtual void emitMachine(StringRef CPU) = 0;
};
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 8879630..2f4d5c1 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -19,8 +19,8 @@
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
@@ -32,7 +32,7 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
-class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
const PPCTargetMachine *TM;
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
@@ -52,15 +52,11 @@ public:
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() {
+ virtual void initializePass() override {
pushTTIStack(this);
}
- virtual void finalizePass() {
- popTTIStack();
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -68,7 +64,7 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ virtual void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -76,31 +72,33 @@ public:
/// \name Scalar TTI Implementations
/// @{
- virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ virtual PopcntSupportKind
+ getPopcntSupport(unsigned TyWidth) const override;
+ virtual void getUnrollingPreferences(
+ Loop *L, UnrollingPreferences &UP) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
- virtual unsigned getNumberOfRegisters(bool Vector) const;
- virtual unsigned getRegisterBitWidth(bool Vector) const;
- virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getNumberOfRegisters(bool Vector) const override;
+ virtual unsigned getRegisterBitWidth(bool Vector) const override;
+ virtual unsigned getMaximumUnrollFactor() const override;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind,
- OperandValueKind) const;
+ OperandValueKind) const override;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const;
+ int Index, Type *SubTp) const override;
virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const;
+ Type *Src) const override;
virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const;
+ Type *CondTy) const override;
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const;
+ unsigned Index) const override;
virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
- unsigned AddressSpace) const;
+ unsigned AddressSpace) const override;
/// @}
};
@@ -141,7 +139,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
- return 32;
+ return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
@@ -210,6 +208,14 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ }
+
// Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
@@ -235,14 +241,16 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- // Each load/store unit costs 1.
- unsigned Cost = LT.first * 1;
+ unsigned Cost =
+ TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+
+ // FIXME: Update this for VSX loads/stores that support unaligned access.
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
unsigned SrcBytes = LT.second.getStoreSize();
if (SrcBytes && Alignment && Alignment < SrcBytes)
- Cost *= (SrcBytes/Alignment);
+ Cost += LT.first*(SrcBytes/Alignment-1);
return Cost;
}
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index fdb8a62..c9548c7 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -1,7 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-
-add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index f77d85b..4102346 100644
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCInfo
parent = PowerPC
-required_libraries = MC Support Target
+required_libraries = Support
add_to_library_groups = PowerPC