aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp262
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp28
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp18
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp26
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp75
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp57
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp73
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp64
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/Makefile2
-rw-r--r--lib/Target/PowerPC/PPC.h23
-rw-r--r--lib/Target/PowerPC/PPC.td31
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp440
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp3
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td37
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp293
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp125
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp60
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h19
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h12
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp329
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1466
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h82
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td27
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td245
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td70
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp49
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td425
-rw-r--r--lib/Target/PowerPC/PPCInstrSPE.td447
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td110
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp482
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h46
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp22
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h19
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h5
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td1
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp140
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h90
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp94
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h42
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp69
60 files changed, 3738 insertions, 2405 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 2f562ca..06bb968 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -213,16 +214,12 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
bool IsPPC64;
bool IsDarwin;
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
bool isPPC64() const { return IsPPC64; }
bool isDarwin() const { return IsDarwin; }
@@ -244,10 +241,12 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDirectiveTC(unsigned Size, SMLoc L);
bool ParseDirectiveMachine(SMLoc L);
bool ParseDarwinDirectiveMachine(SMLoc L);
+ bool ParseDirectiveAbiVersion(SMLoc L);
+ bool ParseDirectiveLocalEntry(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -263,9 +262,8 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
+ const MCInstrInfo &_MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -294,6 +292,7 @@ struct PPCOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Immediate,
+ ContextImmediate,
Expression,
TLSRegister
} Kind;
@@ -338,6 +337,7 @@ public:
Tok = o.Tok;
break;
case Immediate:
+ case ContextImmediate:
Imm = o.Imm;
break;
case Expression:
@@ -362,6 +362,16 @@ public:
assert(Kind == Immediate && "Invalid access!");
return Imm.Val;
}
+ int64_t getImmS16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ if (Kind == Immediate)
+ return Imm.Val;
+ return static_cast<int16_t>(Imm.Val);
+ }
+ int64_t getImmU16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ return Imm.Val;
+ }
const MCExpr *getExpr() const {
assert(Kind == Expression && "Invalid access!");
@@ -406,22 +416,73 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
- bool isU16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isUInt<16>(getImm())); }
- bool isS16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<16>(getImm())); }
+ bool isU6ImmX2() const { return Kind == Immediate &&
+ isUInt<6>(getImm()) &&
+ (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return Kind == Immediate &&
+ isUInt<7>(getImm()) &&
+ (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return Kind == Immediate &&
+ isUInt<8>(getImm()) &&
+ (getImm() & 7) == 0; }
+ bool isU16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isUInt<16>(getImmU16Context());
+ default:
+ return false;
+ }
+ }
+ bool isS16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<16>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isS16ImmX4() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
- bool isS17Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<17>(getImm())); }
+ bool isS17Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<17>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isTLSReg() const { return Kind == TLSRegister; }
- bool isDirectBr() const { return Kind == Expression ||
- (Kind == Immediate && isInt<26>(getImm()) &&
- (getImm() & 3) == 0); }
+ bool isDirectBr() const {
+ if (Kind == Expression)
+ return true;
+ if (Kind != Immediate)
+ return false;
+ // Operand must be 64-bit aligned, signed 27-bit immediate.
+ if ((getImm() & 3) != 0)
+ return false;
+ if (isInt<26>(getImm()))
+ return true;
+ if (!IsPPC64) {
+ // In 32-bit mode, large 32-bit quantities wrap around.
+ if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm())))
+ return true;
+ }
+ return false;
+ }
bool isCondBr() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
@@ -526,6 +587,36 @@ public:
Inst.addOperand(MCOperand::CreateExpr(getExpr()));
}
+ void addS16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
+ void addU16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
@@ -566,9 +657,9 @@ public:
// explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
- Op->Tok.Data = (const char *)(Op.get() + 1);
+ Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
+ std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
@@ -607,6 +698,16 @@ public:
}
static std::unique_ptr<PPCOperand>
+ CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(ContextImmediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -615,6 +716,12 @@ public:
if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
return CreateTLSReg(SRE, S, E, IsPPC64);
+ if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
+ int64_t Res;
+ if (TE->EvaluateAsConstant(Res))
+ return CreateContextImm(Res, S, E, IsPPC64);
+ }
+
return CreateExpr(Val, S, E, IsPPC64);
}
};
@@ -627,6 +734,7 @@ void PPCOperand::print(raw_ostream &OS) const {
OS << "'" << getToken() << "'";
break;
case Immediate:
+ case ContextImmediate:
OS << getImm();
break;
case Expression:
@@ -638,6 +746,29 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
+static void
+addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
+ if (Op.isImm()) {
+ Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+ return;
+ }
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
+ if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
+ Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+ return;
+ }
+ } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
+ const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
+ BinExpr->getLHS(), Ctx);
+ Inst.addOperand(MCOperand::CreateExpr(NE));
+ return;
+ }
+ }
+ Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+}
+
void PPCAsmParser::ProcessInstruction(MCInst &Inst,
const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
@@ -653,41 +784,37 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
case PPC::SUBI: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDI);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIS: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIS);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIC: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIC);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBICo: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDICo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
@@ -921,7 +1048,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
@@ -939,7 +1066,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "unrecognized instruction mnemonic");
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -995,6 +1122,7 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
bool PPCAsmParser::
ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -1176,6 +1304,7 @@ ParseExpression(const MCExpr *&EVal) {
/// for this to be done at a higher level.
bool PPCAsmParser::
ParseDarwinExpression(const MCExpr *&EVal) {
+ MCAsmParser &Parser = getParser();
PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
switch (getLexer().getKind()) {
default:
@@ -1218,6 +1347,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
@@ -1412,6 +1542,10 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
if (IDVal == ".machine")
return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (IDVal == ".abiversion")
+ return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ if (IDVal == ".localentry")
+ return ParseDirectiveLocalEntry(DirectiveID.getLoc());
} else {
if (IDVal == ".machine")
return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
@@ -1422,6 +1556,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -1446,6 +1581,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveTC
/// ::= .tc [ symbol (, expression)* ]
bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Skip TC symbol, which is only used with XCOFF.
while (getLexer().isNot(AsmToken::EndOfStatement)
&& getLexer().isNot(AsmToken::Comma))
@@ -1466,6 +1602,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
/// ParseDirectiveMachine (ELF platforms)
/// ::= .machine [ cpu | "push" | "pop" ]
bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1500,6 +1637,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
/// ParseDarwinDirectiveMachine (Mach-o platforms)
/// ::= .machine cpu-identifier
bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1534,6 +1672,64 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
return false;
}
+/// ParseDirectiveAbiVersion
+/// ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+ int64_t AbiVersion;
+ if (getParser().parseAbsoluteExpression(AbiVersion)){
+ Error(L, "expected constant expression");
+ return false;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitAbiVersion(AbiVersion);
+
+ return false;
+}
+
+/// ParseDirectiveLocalEntry
+/// ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name)) {
+ Error(L, "expected identifier in directive");
+ return false;
+ }
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(L, "expected expression");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitLocalEntry(Sym, Expr);
+
+ return false;
+}
+
+
+
/// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
@@ -1558,6 +1754,10 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
case MCK_1: ImmVal = 1; break;
case MCK_2: ImmVal = 2; break;
case MCK_3: ImmVal = 3; break;
+ case MCK_4: ImmVal = 4; break;
+ case MCK_5: ImmVal = 5; break;
+ case MCK_6: ImmVal = 6; break;
+ case MCK_7: ImmVal = 7; break;
default: return Match_InvalidOperand;
}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index ea4de63..47a9474 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
@@ -16,7 +15,6 @@ add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
- PPCCodeEmitter.cpp
PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
PPCInstrInfo.cpp
@@ -24,7 +22,6 @@ add_llvm_target(PowerPCCodeGen
PPCISelLowering.cpp
PPCFastISel.cpp
PPCFrameLowering.cpp
- PPCJITInfo.cpp
PPCMCInstLower.cpp
PPCMachineFunctionInfo.cpp
PPCRegisterInfo.cpp
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index c1011ff..ea3e7ea 100644
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCDisassembler
parent = PowerPC
-required_libraries = MC PowerPCDesc PowerPCInfo Support
+required_libraries = MCDisassembler PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a2305a9..5251b60 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,7 +12,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -28,13 +27,10 @@ public:
: MCDisassembler(STI, Ctx) {}
virtual ~PPCDisassembler() {}
- // Override MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -325,23 +321,19 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
#include "PPCGenDisassemblerTables.inc"
DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
// Get the four bytes of the instruction.
- uint8_t Bytes[4];
Size = 4;
- if (Region.readBytes(Address, Size, Bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
// The instruction is big-endian encoded.
- uint32_t Inst = (Bytes[0] << 24) |
- (Bytes[1] << 16) |
- (Bytes[2] << 8) |
- (Bytes[3] << 0);
+ uint32_t Inst =
+ (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 7279b09..670c40a 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -207,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 15 && "Invalid u4imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -260,7 +268,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, O);
- O << (int)MI->getOperand(OpNo).getImm()*4;
+ O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
}
@@ -308,10 +316,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- printBranchOperand(MI, OpNo, O);
+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+ // come at the _end_ of the expression.
+ const MCOperand &Op = MI->getOperand(OpNo);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+ O << refExp.getSymbol().getName();
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
+ if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 211a628..b21aa22 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCINSTPRINTER_H
-#define PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
@@ -44,6 +44,7 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 12584be..c54d5e7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -9,7 +9,9 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
@@ -128,6 +130,30 @@ public:
}
}
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override {
+ switch ((PPC::Fixups)Fixup.getKind()) {
+ default: break;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ // If the target symbol has a local entry point we must not attempt
+ // to resolve the fixup directly. Emit a relocation and leave
+ // resolution of the final target address to the linker.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
+ break;
+ }
+ }
+
bool mayNeedRelaxation(const MCInst &Inst) const override {
// FIXME.
return false;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index cd3b4f4..b817394 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
@@ -23,13 +24,12 @@ namespace {
public:
PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
- virtual ~PPCELFObjectWriter();
protected:
- virtual unsigned getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
};
}
@@ -38,9 +38,6 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
/*HasRelocationAddend*/ true) {}
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
const MCFixup &Fixup) {
const MCExpr *Expr = Fixup.getValue();
@@ -69,10 +66,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
llvm_unreachable("unknown PPCMCExpr kind");
}
-unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const
-{
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
@@ -83,7 +79,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
- Type = ELF::R_PPC_REL24;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_PPC_PLTREL24;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LOCAL:
+ Type = ELF::R_PPC_LOCAL24PC;
+ break;
+ }
break;
case PPC::fixup_ppc_brcond14:
case PPC::fixup_ppc_brcond14abs:
@@ -224,7 +231,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
- Type = ELF::R_PPC64_GOT_TLSGD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSGD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
Type = ELF::R_PPC64_GOT_TLSGD16_LO;
@@ -236,7 +246,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_GOT_TLSGD16_HA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
- Type = ELF::R_PPC64_GOT_TLSLD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSLD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
Type = ELF::R_PPC64_GOT_TLSLD16_LO;
@@ -332,13 +345,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_PPC_TLSGD:
- Type = ELF::R_PPC64_TLSGD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSGD;
+ else
+ Type = ELF::R_PPC_TLSGD;
break;
case MCSymbolRefExpr::VK_PPC_TLSLD:
- Type = ELF::R_PPC64_TLSLD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSLD;
+ else
+ Type = ELF::R_PPC_TLSLD;
break;
case MCSymbolRefExpr::VK_PPC_TLS:
- Type = ELF::R_PPC64_TLS;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLS;
+ else
+ Type = ELF::R_PPC_TLS;
break;
}
break;
@@ -373,10 +395,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
return Type;
}
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return getRelocTypeInner(Target, Fixup, IsPCRel);
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
+
+ case ELF::R_PPC_REL24:
+ // If the target symbol has a local entry point, we must keep the
+ // target symbol to preserve that information for the linker.
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(SD) << 2;
+ return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+ }
}
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 68de8c1..ae43e59 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_PPC_PPCFIXUPKINDS_H
-#define LLVM_PPC_PPCFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b95a2ac..893aae3 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -42,9 +42,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
UseIntegratedAssembler = true;
}
-void PPCLinuxMCAsmInfo::anchor() { }
+void PPCELFMCAsmInfo::anchor() { }
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
+PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
DollarIsPC = true;
// Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
MinInstAlignment = 4;
// Exceptions handling
@@ -73,6 +72,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
if (T.getOS() == llvm::Triple::FreeBSD ||
(T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 754330b..9f0294d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETASMINFO_H
-#define PPCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -26,10 +26,10 @@ class Triple;
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
+ class PPCELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 435a93f..786b7fe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -66,6 +66,15 @@ public:
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -260,6 +269,54 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
}
+unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 3ac0aca..7204bef 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCFixupKinds.h"
#include "PPCMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -52,40 +53,56 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
}
bool
+PPCMCExpr::EvaluateAsConstant(int64_t &Res) const {
+ MCValue Value;
+
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr))
+ return false;
+
+ if (!Value.isAbsolute())
+ return false;
+
+ Res = EvaluateAsInt64(Value.getConstant());
+ return true;
+}
+
+int64_t
+PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
+ switch (Kind) {
+ case VK_PPC_LO:
+ return Value & 0xffff;
+ case VK_PPC_HI:
+ return (Value >> 16) & 0xffff;
+ case VK_PPC_HA:
+ return ((Value + 0x8000) >> 16) & 0xffff;
+ case VK_PPC_HIGHER:
+ return (Value >> 32) & 0xffff;
+ case VK_PPC_HIGHERA:
+ return ((Value + 0x8000) >> 32) & 0xffff;
+ case VK_PPC_HIGHEST:
+ return (Value >> 48) & 0xffff;
+ case VK_PPC_HIGHESTA:
+ return ((Value + 0x8000) >> 48) & 0xffff;
+ case VK_PPC_None:
+ break;
+ }
+ llvm_unreachable("Invalid kind!");
+}
+
+bool
PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup))
return false;
if (Value.isAbsolute()) {
- int64_t Result = Value.getConstant();
- switch (Kind) {
- default:
- llvm_unreachable("Invalid kind!");
- case VK_PPC_LO:
- Result = Result & 0xffff;
- break;
- case VK_PPC_HI:
- Result = (Result >> 16) & 0xffff;
- break;
- case VK_PPC_HA:
- Result = ((Result + 0x8000) >> 16) & 0xffff;
- break;
- case VK_PPC_HIGHER:
- Result = (Result >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHERA:
- Result = ((Result + 0x8000) >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHEST:
- Result = (Result >> 48) & 0xffff;
- break;
- case VK_PPC_HIGHESTA:
- Result = ((Result + 0x8000) >> 48) & 0xffff;
- break;
- }
+ int64_t Result = EvaluateAsInt64(Value.getConstant());
+ if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
+ (Result >= 0x8000))
+ return false;
Res = MCValue::get(Result);
} else {
if (!Layout)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index bca4085..f0a6bb9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCEXPR_H
-#define PPCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
@@ -34,6 +34,8 @@ private:
const MCExpr *Expr;
bool IsDarwin;
+ int64_t EvaluateAsInt64(int64_t Value) const;
+
explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
bool _IsDarwin)
: Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
@@ -78,7 +80,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
@@ -87,6 +90,8 @@ public:
// There are no TLS PPCMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+ bool EvaluateAsConstant(int64_t &Res) const;
+
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 7057797..00be8f4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -16,12 +16,16 @@
#include "PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -75,7 +79,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
- MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
+ MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
@@ -125,11 +129,20 @@ public:
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
+ void emitAbiVersion(int AbiVersion) override {
+ OS << "\t.abiversion " << AbiVersion << '\n';
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+ }
};
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitSymbolValue(&S, 8);
@@ -138,6 +151,39 @@ public:
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
+ void emitAbiVersion(int AbiVersion) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_PPC64_ABI;
+ Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+ MCA.setELFHeaderEFlags(Flags);
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
+
+ int64_t Res;
+ if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+ report_fatal_error(".localentry expression must be absolute.");
+
+ unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+ if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+ report_fatal_error(".localentry expression cannot be encoded.");
+
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= Encoded;
+ MCELF::setOther(Data, Other >> 2);
+
+ // For GAS compatibility, unless we already saw a .abiversion directive,
+ // set e_flags to indicate ELFv2 ABI.
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ if ((Flags & ELF::EF_PPC64_ABI) == 0)
+ MCA.setELFHeaderEFlags(Flags | 2);
+ }
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {
@@ -150,25 +196,27 @@ public:
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
+ void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("Unknown pseudo-op: .abiversion");
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ llvm_unreachable("Unknown pseudo-op: .localentry");
+ }
};
}
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
if (Triple(TT).isOSDarwin()) {
MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetMachOStreamer(*S);
return S;
}
- MCStreamer *S =
- createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetELFStreamer(*S);
return S;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 474395b..68f7f7a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCTARGETDESC_H
-#define PPCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index cff27ba..df2f14a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -80,7 +80,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+/// Outline based on PPCELFObjectWriter::GetRelocType().
static unsigned getRelocType(const MCValue &Target,
const MCFixupKind FixupKind, // from
// Fixup.getKind()
@@ -360,7 +360,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 10e328a..6075631 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index c966748..cf516f4 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -13,7 +13,7 @@ TARGET = PPC
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
- PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenAsmWriter.inc \
PPCGenInstrInfo.inc PPCGenDAGISel.inc \
PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c42c5be..8fb33df 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_H
-#define LLVM_TARGET_POWERPC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
+#define LLVM_LIB_TARGET_POWERPC_PPC_H
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -26,7 +26,6 @@ namespace llvm {
class PassRegistry;
class FunctionPass;
class ImmutablePass;
- class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
class MCInst;
@@ -41,8 +40,6 @@ namespace llvm {
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
- FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
@@ -60,10 +57,11 @@ namespace llvm {
// PPC Specific MachineOperand flags.
MO_NO_FLAG,
- /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and earlier.
- MO_DARWIN_STUB = 1,
+ /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is
+ /// used for calls and jumps to external functions on Tiger and earlier, and
+ /// for PIC calls on Linux and ELF systems.
+ MO_PLT_OR_STUB = 1,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
@@ -95,7 +93,12 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4
+ MO_TLS = 8 << 4,
+
+ // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
+ // call sequences.
+ MO_TLSLD = 9 << 4,
+ MO_TLSGD = 10 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index a9842b2..46d56a4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -56,6 +56,8 @@ def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
"Use condition-register bits individually">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
+def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
+ "Enable SPE instructions">;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -88,11 +90,23 @@ def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
+ "Has only the msync instruction instead of sync",
+ [FeatureBookE]>;
+def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true",
+ "Enable E500/E500mc instructions">;
+def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
+ "Enable PPC 4xx instructions">;
+def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
+ "Enable PPC 6xx instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
+ "Enable POWER8 vector instructions",
+ [FeatureVSX, FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -105,7 +119,16 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// CMPB p6, p6x, p7 cmpb
// DFP p6, p6x, p7 decimal floating-point instructions
// POPCNTB p5 through p7 popcntb and related instructions
-// VSX p7 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// ABI Selection //
+//===----------------------------------------------------------------------===//
+
+def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
+ "Use the ELFv1 ABI">;
+
+def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
+ "Use the ELFv2 ABI">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -178,10 +201,12 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fd044d9..5648873 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMachineFunctionInfo.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCSubtarget.h"
@@ -27,10 +28,12 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -100,9 +103,11 @@ namespace {
}
bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
void EmitFunctionEntryLabel() override;
+ void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
};
@@ -142,7 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -270,6 +275,18 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
return false;
}
+ case 'U': // Print 'u' for update form.
+ case 'X': // Print 'x' for indexed form.
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
}
}
@@ -285,7 +302,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
@@ -306,12 +323,35 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+ const Module *M = MF->getFunction()->getParent();
+ PICLevel::Level PL = M->getPICLevel();
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
+ case PPC::MoveGOTtoLR: {
+ // Transform %LR = MoveGOTtoLR
+ // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
+ // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
+ // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
+ // blrl
+ // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
+ MCSymbol *GOTSymbol =
+ OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
+ MCSymbolRefExpr::VK_PPC_LOCAL,
+ OutContext),
+ MCConstantExpr::Create(4, OutContext),
+ OutContext);
+
+ // Emit the 'bl'.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+ return;
+ }
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
@@ -330,11 +370,85 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
+ case PPC::UpdateGBR: {
+ // Transform %Rd = UpdateGBR(%Rt, %Ri)
+ // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
+ // add %Rd, %Rt, %Ri
+ // Get the offset from the GOT Base Register to the GOT
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ MCSymbol *PICOffset =
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ TmpInst.setOpcode(PPC::LWZ);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCOperand TR = TmpInst.getOperand(1);
+ const MCOperand PICR = TmpInst.getOperand(0);
+
+ // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
+ TmpInst.getOperand(1) =
+ MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+ TmpInst.getOperand(0) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(OutStreamer, TmpInst);
+
+ TmpInst.setOpcode(PPC::ADD4);
+ TmpInst.getOperand(0) = PICR;
+ TmpInst.getOperand(1) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtoc: {
+ // Transform %R3 = LWZtoc <ga:@min1>, %R2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LWZ, and the global address operand to be a
+ // reference to the GOT entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+
+ if (PL == PICLevel::Small) {
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ } else {
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ OutContext);
+ Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ }
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
case PPC::LDtocJTI:
case PPC::LDtocCPT:
+ case PPC::LDtocBA:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
@@ -342,7 +456,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
// Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
@@ -350,6 +464,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
@@ -363,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDIS8. If the global address is external, has
// common linkage, is a non-local function address, or is a jump table
@@ -371,7 +487,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
@@ -391,9 +508,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
- MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+ MO.isJTI() || MO.isBlockAddress() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -405,19 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::LDtocL: {
// Transform %Xd = LDtocL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD. If the global address is external, has
// common linkage, or is a jump table address, then reference the
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
- assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for LDtocL!");
MCSymbol *MOSymbol = nullptr;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isBlockAddress()) {
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
if (TM.getCodeModel() == CodeModel::Large)
@@ -442,7 +567,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItocL: {
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise reference the
@@ -493,7 +618,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD.
TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
@@ -508,6 +633,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case PPC::PPC32PICGOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+ MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::Create(GOTRef, OutContext),
+ OutContext);
+ OutStreamer.EmitLabel(GOTRef);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(NextInstr);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
case PPC::PPC32GOT: {
MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *SymGotTlsL =
@@ -541,40 +694,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsGD));
return;
}
- case PPC::ADDItlsgdL: {
+ case PPC::ADDItlsgdL:
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsgdL32: {
+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsGD));
- return;
- }
- case PPC::GETtlsADDR: {
- // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
return;
}
case PPC::ADDIStlsldHA: {
@@ -593,72 +731,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsLD));
return;
}
- case PPC::ADDItlsldL: {
+ case PPC::ADDItlsldL:
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsldL32: {
+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsld
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsLD));
- return;
- }
- case PPC::GETtlsldADDR: {
- // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
return;
}
- case PPC::ADDISdtprelHA: {
+ case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDISdtprelHA32: {
+ // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+ // Into: %Rd = ADDIS %R3, sym@dtprel@ha
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X3)
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+ .addExpr(SymDtprel));
return;
}
- case PPC::ADDIdtprelL: {
+ case PPC::ADDIdtprelL:
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@dtprel@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDIdtprelL32: {
+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@dtprel@l
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
return;
}
case PPC::MFOCRF:
@@ -713,14 +842,77 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
EmitToStreamer(OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget.isELFv2ABI()) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitAbiVersion(2);
+ }
+
+ if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ if (M.getPICLevel() == PICLevel::Small)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly()));
+
+ MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
+ MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+
+ OutStreamer.EmitLabel(CurrentPos);
+
+ // The GOT pointer points to the middle of the GOT, in order to reference the
+ // entire 64kB range. 0x8000 is the midpoint.
+ const MCExpr *tocExpr =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
+ MCConstantExpr::Create(0x8000, OutContext),
+ OutContext);
+
+ OutStreamer.EmitAssignment(TOCSym, tocExpr);
+
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
- if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ // linux/ppc32 - Normal entry label.
+ if (!Subtarget.isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
+ MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
-
+
+ if (!Subtarget.isPPC64()) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ if (PPCFI->usesPICBase()) {
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ OutStreamer.EmitLabel(RelocSymbol);
+
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ OutContext),
+ MCSymbolRefExpr::Create(PICBase, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ return;
+ } else
+ return AsmPrinter::EmitFunctionEntryLabel();
+ }
+
+ // ELFv2 ABI - Normal entry label.
+ if (Subtarget.isELFv2ABI())
+ return AsmPrinter::EmitFunctionEntryLabel();
+
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
@@ -752,15 +944,22 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
- if (isPPC64 && !TOC.empty()) {
- const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+ if (!TOC.empty()) {
+ const MCSectionELF *Section;
+
+ if (isPPC64)
+ Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ else
+ Section = OutStreamer.getContext().getELFSection(".got2",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
@@ -768,8 +967,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
- MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- TS.emitTCEntry(*S);
+ MCSymbol *S = I->first;
+ if (isPPC64)
+ TS.emitTCEntry(*S);
+ else
+ OutStreamer.EmitSymbolValue(S, 4);
}
}
@@ -795,6 +997,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+ // In the ELFv2 ABI, in functions that use the TOC register, we need to
+ // provide two entry points. The ABI guarantees that when calling the
+ // local entry point, r2 is set up by the caller to contain the TOC base
+ // for this function, and when calling the global entry point, r12 is set
+ // up by the caller to hold the address of the global entry point. We
+ // thus emit a prefix sequence along the following lines:
+ //
+ // func:
+ // # global entry point
+ // addis r2,r12,(.TOC.-func)@ha
+ // addi r2,r2,(.TOC.-func)@l
+ // .localentry func, .-func
+ // # local entry point, followed by function body
+ //
+ // This ensures we have r2 set up correctly while executing the function
+ // body, no matter which entry point is called.
+ if (Subtarget.isELFv2ABI()
+ // Only do all that if the function uses r2 in the first place.
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+ MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(GlobalEntryLabel);
+ const MCSymbolRefExpr *GlobalEntryLabelExp =
+ MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+
+ MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+
+ MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(LocalEntryLabel);
+ const MCSymbolRefExpr *LocalEntryLabelExp =
+ MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+ const MCExpr *LocalOffsetExp =
+ MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+ GlobalEntryLabelExp, OutContext);
+
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+ }
+}
+
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
@@ -886,7 +1150,8 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
bool isDarwin = Subtarget.isDarwin();
const TargetLoweringObjectFileMachO &TLOFMacho =
@@ -1022,7 +1287,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ee90671..41594be 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
@@ -64,7 +65,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index ec1e34d..5f3b176 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -214,7 +214,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
@@ -384,10 +384,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index d48164d..cf8fee4 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -14,9 +14,15 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
class CCIfNotSubtarget<string F, CCAction A>
- : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("!static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Convention
@@ -31,13 +37,18 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ // Floating point types returned as "direct" go into F1 .. F8; note that
+ // only the ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
-
- // Vector types are always returned in V2.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
- CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+ // Vector types returned as "direct" go into V2 .. V9; note that only the
+ // ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
@@ -69,10 +80,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i32], CCPromoteToType<i64>>,
CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
- CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
deleted file mode 100644
index 0875523..0000000
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
-// JIT-compile bitcode to native PowerPC.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-namespace {
- class PPCCodeEmitter : public MachineFunctionPass {
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- MachineModuleInfo *MMI;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
- /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
- /// its address in the function into this pointer.
- void *MovePCtoLROffset;
- public:
-
- PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
-
- MachineRelocation GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const;
-
- /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- const char *getPassName() const override {
- return "PowerPC Machine Code Emitter";
- }
-
- /// runOnMachineFunction - emits the given MachineFunction to memory
- ///
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- /// emitBasicBlock - emits the given MachineBasicBlock to memory
- ///
- void emitBasicBlock(MachineBasicBlock &MBB);
- };
-}
-
-char PPCCodeEmitter::ID = 0;
-
-/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
-/// to the specified MCE object.
-FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new PPCCodeEmitter(TM, JCE);
-}
-
-bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
- "JIT relocation model must be set to static or default!");
-
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
- do {
- MovePCtoLROffset = nullptr;
- MCE.startFunction(MF);
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- emitBasicBlock(*BB);
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
- MCE.StartMachineBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
- const MachineInstr &MI = *I;
- MCE.processDebugLoc(MI.getDebugLoc(), true);
- switch (MI.getOpcode()) {
- default:
- MCE.emitWordBE(getBinaryCodeForInstr(MI));
- break;
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break; // pseudo opcode, no side effects
- case PPC::MovePCtoLR:
- case PPC::MovePCtoLR8:
- assert(TM.getRelocationModel() == Reloc::PIC_);
- MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
- MCE.emitWordBE(0x48000005); // bl 1
- break;
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
- }
-}
-
-unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
- MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
- (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
-}
-
-MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const {
- // If in PIC mode, we need to encode the negated address of the
- // 'movepctolr' into the unrelocated field. After relocation, we'll have
- // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
- // field, we get &gv. This doesn't happen for branch relocations, which are
- // always implicitly pc relative.
- intptr_t Cst = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
- Cst = -(intptr_t)MovePCtoLROffset - 4;
- }
-
- if (MO.isGlobal())
- return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
- const_cast<GlobalValue *>(MO.getGlobal()),
- Cst, isa<Function>(MO.getGlobal()));
- if (MO.isSymbol())
- return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- RelocID, MO.getSymbolName(), Cst);
- if (MO.isCPI())
- return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-
- if (MO.isMBB())
- return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- RelocID, MO.getMBB());
-
- assert(MO.isJTI());
- return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-}
-
-unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- unsigned RelocID;
- switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
- default: llvm_unreachable("Unsupported target operand flags!");
- case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
- case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
- }
-
- MCE.addRelocation(GetRelocation(MO, RelocID));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memri, which has the low 16-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
-
- // Add a fixup for the displacement field.
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
- return RegBits;
-}
-
-unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memrix, which has the low 14-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
- return RegBits;
-}
-
-
-unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
-
- if (MO.isReg()) {
- // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
- // The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
- MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
- MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- }
-
- assert(MO.isImm() &&
- "Relocation required in an instruction that we cannot encode!");
- return MO.getImm();
-}
-
-#include "PPCGenCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 92a0ec1..1149354 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -39,7 +39,7 @@
//===----------------------------------------------------------------------===//
//
// TBD:
-// FastLowerArguments: Handle simple cases.
+// fastLowerArguments: Handle simple cases.
// PPCMaterializeGV: Handle TLS.
// SelectCall: Handle function pointers.
// SelectCall: Handle multi-register return values.
@@ -92,30 +92,29 @@ class PPCFastISel final : public FastISel {
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
- : FastISel(FuncInfo, LibInfo),
- TM(FuncInfo.MF->getTarget()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
- Context(&FuncInfo.Fn->getContext()) { }
+ : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+ Context(&FuncInfo.Fn->getContext()) {}
// Backend specific FastISel code.
private:
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
- unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
- unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ bool fastLowerArguments() override;
+ unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
- unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
@@ -153,7 +152,7 @@ class PPCFastISel final : public FastISel {
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+ unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -560,7 +559,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -707,7 +706,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -715,7 +714,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DbgLoc);
+ fastEmitBranch(Target, DbgLoc);
return true;
}
@@ -838,7 +837,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
return false;
// No code is generated for a FP extend.
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -860,12 +859,12 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
@@ -898,10 +897,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
} else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
}
}
@@ -979,13 +978,13 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
// Move the floating-point value in SrcReg into an integer destination
// register, and return the register (or zero if we can't handle it).
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
@@ -1080,7 +1079,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IntReg == 0)
return false;
- UpdateValueMap(I, IntReg);
+ updateValueMap(I, IntReg);
return true;
}
@@ -1169,7 +1168,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
ResultReg)
.addReg(SrcReg1)
.addImm(Imm);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -1185,7 +1184,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1200,10 +1199,12 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
CCInfo.AllocateStack(LinkageSize, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1232,6 +1233,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
NumBytes = std::max(NumBytes, LinkageSize + 64);
// Issue CALLSEQ_START.
@@ -1318,7 +1320,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
@@ -1364,7 +1366,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
assert(ResultReg && "ResultReg unset!");
UsedRegs.push_back(SourcePhysReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
}
@@ -1408,7 +1410,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
@@ -1498,6 +1500,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (PPCSubTarget->isELFv2ABI())
+ MIB.addReg(PPC::X2, RegState::Implicit);
+
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
@@ -1531,7 +1537,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
@@ -1541,13 +1547,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Special case for returning a constant integer of any size.
// Materialize the constant as an i64 and copy it to the return
- // register. This avoids an unnecessary extend or truncate.
+ // register. We still need to worry about properly extending the sign. E.g:
+ // If the constant has only one bit, it means it is a boolean. Therefore
+ // we can't use PPCMaterializeInt because it extends the sign which will
+ // cause negations of the returned value to be incorrect as they are
+ // implemented as the flip of the least significant bit.
if (isa<ConstantInt>(*RV)) {
const Constant *C = cast<Constant>(RV);
- unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
- unsigned RetReg = ValLocs[0].getLocReg();
+
+ CCValAssign &VA = ValLocs[0];
+
+ unsigned RetReg = VA.getLocReg();
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
+ VA.getLocInfo() == CCValAssign::SExt);
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
RetRegs.push_back(RetReg);
} else {
@@ -1714,7 +1730,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
SrcReg = ResultReg;
}
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -1753,13 +1769,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
-bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
@@ -2007,7 +2023,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
+ bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
@@ -2031,7 +2048,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm(CI->getSExtValue());
+ .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
return ImmReg;
}
@@ -2048,7 +2065,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -2067,7 +2084,7 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
// Materialize the address created by an alloca into a register, and
// return the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -2167,7 +2184,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// Attempt to lower call arguments in a faster way than done by
// the selection DAG code.
-bool PPCFastISel::FastLowerArguments() {
+bool PPCFastISel::fastLowerArguments() {
// Defer to normal argument lowering for now. It's reasonably
// efficient. Consider doing something like ARM to handle the
// case where all args fit in registers, no varargs, no float
@@ -2177,7 +2194,7 @@ bool PPCFastISel::FastLowerArguments() {
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
-unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
@@ -2214,7 +2231,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// assigning R0 or X0 to the output register for GPRC and G8RC
// register classes, as any such result could be used in ADDI, etc.,
// where those regs have another meaning.
-unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
@@ -2227,27 +2244,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
Op0, Op0IsKill, Imm);
}
// Override for instructions with one register operand to avoid use of
// R0/X0. The automatic infrastructure isn't aware of the context so
// we must be conservative.
-unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+ return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
}
// Override for instructions with two register operands to avoid use
// of R0/X0. The automatic infrastructure isn't aware of the context
// so we must be conservative.
-unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
@@ -2255,7 +2272,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
Op1, Op1IsKill);
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65e9cf2..dc87a6c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -254,7 +254,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -372,7 +372,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -400,7 +400,8 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Maximum call frame needs to be at least big enough for linkage area.
unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ Subtarget.isDarwinABI(),
+ Subtarget.isELFv2ABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -459,9 +460,9 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg;
+ unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -497,21 +498,23 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
assert((isDarwinABI || isSVR4ABI) &&
"Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
@@ -546,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
@@ -602,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -623,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
"Prologue CR saving supported only in 64-bit mode");
if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it could be more
+ // efficient to use mfocrf to selectively save just those fields.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
@@ -791,8 +799,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+ // the whole CR word. In the ELFv2 ABI, every CR that was
+ // actually saved gets its own CFI record.
+ unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -812,9 +824,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
@@ -839,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -849,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -890,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -1054,7 +1069,7 @@ void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1067,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPSI = FI->getFramePointerSaveIndex();
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
MachineFrameInfo *MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1081,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int BPSI = FI->getBasePointerSaveIndex();
if (!BPSI && RegInfo->hasBasePointer(MF)) {
- int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
// Allocate the frame index for the base pointer save area.
BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
// Save the result.
@@ -1185,7 +1201,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -1220,7 +1236,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1368,7 +1384,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -1430,7 +1446,7 @@ restoreCRs(bool isPPC64, bool is31,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
unsigned RestoreOp, MoveReg;
@@ -1463,7 +1479,7 @@ void PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
@@ -1513,7 +1529,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 7a226f7..c482588 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_FRAMEINFO_H
-#define POWERPC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
@@ -76,8 +76,8 @@ public:
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- static unsigned getTOCSaveOffset(void) {
- return 40;
+ static unsigned getTOCSaveOffset(bool isELFv2ABI) {
+ return isELFv2ABI ? 24 : 40;
}
/// getFramePointerSaveOffset - Return the previous frame offset to save the
@@ -97,19 +97,22 @@ public:
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ static unsigned getBasePointerSaveOffset(bool isPPC64,
+ bool isDarwinABI,
+ bool isPIC) {
if (isDarwinABI)
return isPPC64 ? -16U : -8U;
// SVR4 ABI: First slot in the general register save area.
- return isPPC64 ? -16U : -8U;
+ return isPPC64 ? -16U : isPIC ? -12U : -8U;
}
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
- static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
+ bool isELFv2ABI) {
if (isDarwinABI || isPPC64)
- return 6 * (isPPC64 ? 8 : 4);
+ return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
// SVR4 ABI:
return 8;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 23f76c1..4b50214 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCHAZRECS_H
-#define PPCHAZRECS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
public:
PPCHazardRecognizer970(const ScheduleDAG &DAG);
- virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
- virtual void EmitInstruction(SUnit *SU) override;
- virtual void AdvanceCycle() override;
- virtual void Reset() override;
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void Reset() override;
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4881b3f..49ba58b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -56,16 +58,16 @@ namespace {
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm),
- PPCLowering(TM.getTargetLowering()),
- PPCSubTarget(TM.getSubtargetImpl()) {
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCLowering = TM.getTargetLowering();
+ PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
PPCSubTarget = TM.getSubtargetImpl();
SelectionDAGISel::runOnMachineFunction(MF);
@@ -232,7 +234,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -268,16 +270,34 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ const Module *M = MF->getFunction()->getParent();
DebugLoc dl;
if (PPCLowering->getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ if (PPCSubTarget->isTargetELF()) {
+ GlobalBaseReg = PPC::R30;
+ if (M->getPICLevel() == PICLevel::Small) {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
+ } else {
+ GlobalBaseReg =
+ RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ }
} else {
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
@@ -650,94 +670,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
- bool HasVSX) {
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETUEQ:
- case ISD::SETNE:
- case ISD::SETUNE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPEQUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPEQUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPEQUW;
- // v4f32 != v4f32 could be translate to unordered not equal
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETLT:
- case ISD::SETGT:
- case ISD::SETLE:
- case ISD::SETGE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTSB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTSH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTSW;
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETULT:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTUW;
- break;
- case ISD::SETOEQ:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETOLT:
- case ISD::SETOGT:
- case ISD::SETOLE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETOGE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGEDP;
- break;
- default:
- break;
- }
- llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
- switch (VecVT) {
- case MVT::v16i8:
- return PPC::VCMPEQUB;
- case MVT::v8i16:
- return PPC::VCMPEQUH;
- case MVT::v4i32:
- return PPC::VCMPEQUW;
- case MVT::v4f32:
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- case MVT::v2f64:
- return PPC::XVCMPEQDP;
- default:
- llvm_unreachable("Invalid integer vector compare condition");
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
}
}
@@ -829,60 +860,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
- MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETNE:
- case ISD::SETONE:
- case ISD::SETUNE: {
- SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
- PPC::VNOR,
- VecVT, VCmp, VCmp);
- }
- case ISD::SETLT:
- case ISD::SETOLT:
- case ISD::SETULT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
- case ISD::SETGT:
- case ISD::SETOGT:
- case ISD::SETUGT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETGE:
- case ISD::SETOGE:
- case ISD::SETUGE: {
- // Small optimization: Altivec provides a 'Vector Compare Greater Than
- // or Equal To' instruction (vcmpgefp), so in this case there is no
- // need for extra logic for the equal compare.
- if (VecVT.getSimpleVT().isFloatingPoint()) {
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- } else {
- SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpGT, VCmpEQ);
- }
- }
- case ISD::SETLE:
- case ISD::SETOLE:
- case ISD::SETULE: {
- SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpLE, VCmpEQ);
- }
- default:
- llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
}
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
}
if (PPCSubTarget->useCRBits())
@@ -924,6 +915,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
+ // In case any misguided DAG-level optimizations form an ADD with a
+ // TargetConstant operand, crash here instead of miscompiling (by selecting
+ // an r+r add instead of some kind of r+i add).
+ if (N->getOpcode() == ISD::ADD &&
+ N->getOperand(1).getOpcode() == ISD::TargetConstant)
+ llvm_unreachable("Invalid ADD with TargetConstant operand");
+
switch (N->getOpcode()) {
default: break;
@@ -1331,7 +1329,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::f32)
SelectCCOp = PPC::SELECT_CC_F4;
else if (N->getValueType(0) == MVT::f64)
- SelectCCOp = PPC::SELECT_CC_F8;
+ if (PPCSubTarget->hasVSX())
+ SelectCCOp = PPC::SELECT_CC_VSFRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else if (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)
+ SelectCCOp = PPC::SELECT_CC_VSRC;
else
SelectCCOp = PPC::SELECT_CC_VRRC;
@@ -1445,11 +1449,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
- assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI");
+ assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
+ if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+ SDValue GA = N->getOperand(0);
+ return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ N->getOperand(1));
+ }
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
- // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
CodeModel::Model CModel = TM.getCodeModel();
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
@@ -1466,7 +1476,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
- if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+ CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
@@ -1483,6 +1494,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::PPC32_PICGOT: {
+ // Generate a PIC-safe GOT reference.
+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ }
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
@@ -1683,7 +1700,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_VRRC: {
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC: {
SDValue Op = MachineNode->getOperand(0);
if (Op.isMachineOpcode()) {
if (Op.getMachineOpcode() == PPC::CRSET)
@@ -1989,6 +2008,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC:
if (Op1Set)
ResNode = MachineNode->getOperand(1).getNode();
else if (Op1Unset)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bc057bf..e93bdaf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -39,6 +39,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+// FIXME: Remove this once soft-float is supported.
+static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
+cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
+
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -51,19 +55,10 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- // If it isn't a Mach-O file then it's going to be a linux ELF
- // object file.
- if (TT.isOSDarwin())
- return new TargetLoweringObjectFileMachO();
-
- return new PPC64LinuxTargetObjectFile();
-}
-
-PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
+ : TargetLowering(TM),
Subtarget(*TM.getSubtargetImpl()) {
- setPow2DivIsCheap();
+ setPow2SDivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
@@ -453,6 +448,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
@@ -526,11 +523,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
@@ -561,11 +553,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
@@ -617,15 +604,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ if (!isPPC64) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ if (!isPPC64) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ }
+
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
@@ -685,11 +679,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget.isJITCodeModel())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- setSupportJumpTables(false);
-
setInsertFencesForAtomic(true);
if (Subtarget.enableMachineScheduler())
@@ -782,6 +771,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
+ case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
@@ -811,10 +802,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
- case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
- case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
@@ -828,6 +817,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
+bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
@@ -853,14 +847,27 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
- if (!isUnary) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+j))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
@@ -871,27 +878,34 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- unsigned j, k;
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- j = 0;
- k = 1;
- } else {
- j = 2;
- k = 3;
- }
- if (!isUnary) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+k))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+k))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
return false;
}
return true;
@@ -919,38 +933,63 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- if (!isUnary)
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ else
+ return false;
} else {
- if (!isUnary)
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ else
+ return false;
}
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- if (!isUnary)
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ else
+ return false;
} else {
- if (!isUnary)
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ else
+ return false;
}
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
+/// The ShuffleKind distinguishes between big-endian operations with two
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2). For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
@@ -968,38 +1007,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-
- ShiftAmt += i;
-
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
- return -1;
- }
-
- } else { // Big Endian
+ ShiftAmt -= i;
+ bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
+ isLittleEndian();
+
+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else if (ShuffleKind == 1) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ } else
+ return -1;
- ShiftAmt -= i;
+ if (ShuffleKind == 2 && isLE)
+ ShiftAmt = 16 - ShiftAmt;
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
- return -1;
- }
- }
return ShiftAmt;
}
@@ -1055,7 +1082,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1331,7 +1358,13 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
- Base = N.getOperand(0);
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
@@ -1491,10 +1524,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
- // Don't use the pic base if not in PIC relocation model. Or if we are on a
- // non-darwin platform. We don't support PIC on other platforms yet.
- bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
- TM.getSubtarget<PPCSubtarget>().isDarwin();
+ // Don't use the pic base if not in PIC relocation model.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
if (isPIC) {
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
@@ -1550,6 +1582,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(CP);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue CPIHi =
DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
SDValue CPILo =
@@ -1571,6 +1612,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(GA);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
@@ -1579,8 +1629,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
+ BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+ const BlockAddress *BA = BASDN->getBlockAddress();
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual BlockAddress is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
@@ -1589,6 +1647,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+// Generate a call to __tls_get_addr for the given GOT entry Op.
+std::pair<SDValue,SDValue>
+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const {
+
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Op;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, IntPtrTy,
+ DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
+ std::move(Args), 0);
+
+ return LowerCallTo(CLI);
+}
+
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@@ -1601,6 +1680,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = Subtarget.isPPC64();
+ const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+ PICLevel::Level picLevel = M->getPICLevel();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1632,50 +1713,46 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSGD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLS_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
- return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ return CallResult.first;
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSLD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLSLD_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ SDValue TLSAddr = CallResult.first;
+ SDValue Chain = CallResult.second;
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, ParmReg, TGA);
+ Chain, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@@ -1700,6 +1777,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ GSDN->getOffset(),
+ PPCII::MO_PIC_FLAG);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ }
+
SDValue GAHi =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
SDValue GALo =
@@ -1794,7 +1879,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// gpr_index
SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
VAListPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = GprIndex.getValue(1);
if (VT == MVT::i64) {
@@ -1817,7 +1902,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// fpr
SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
FprPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = FprIndex.getValue(1);
SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
@@ -2127,14 +2212,19 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned ArgSize = ArgVT.getStoreSize();
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
- ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ // Round up to multiples of the pointer size, except for array members,
+ // which are always packed.
+ if (!Flags.isInConsecutiveRegs())
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
return ArgSize;
}
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
/// on the stack.
-static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
unsigned Align = PtrByteSize;
@@ -2156,14 +2246,78 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
}
}
+ // Array members are always packed to their original alignment.
+ if (Flags.isInConsecutiveRegs()) {
+ // If the array member was split into multiple registers, the first
+ // needs to be aligned to the size of the full type. (Except for
+ // ppcf128, which is only aligned as its f64 components.)
+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+ Align = OrigVT.getStoreSize();
+ else
+ Align = ArgVT.getStoreSize();
+ }
+
return Align;
}
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers). ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
+ unsigned &AvailableFPRs,
+ unsigned &AvailableVRs) {
+ bool UseMemory = false;
+
+ // Respect alignment of argument on the stack.
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ // If there's no space left in the argument save area, we must
+ // use memory (this check also catches zero-sized arguments).
+ if (ArgOffset >= LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // Allocate argument on the stack.
+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // If we overran the argument save area, we must use memory
+ // (this check catches arguments passed partially in memory)
+ if (ArgOffset > LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // However, if the argument is actually passed in an FPR or a VR,
+ // we don't use memory after all.
+ if (!Flags.isByVal()) {
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (AvailableFPRs > 0) {
+ --AvailableFPRs;
+ return false;
+ }
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ if (AvailableVRs > 0) {
+ --AvailableVRs;
+ return false;
+ }
+ }
+
+ return UseMemory;
+}
+
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
/// ensure minimum alignment required for target.
static unsigned EnsureStackAlignment(const TargetMachine &Target,
unsigned NumBytes) {
- unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign =
+ Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign - 1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
return NumBytes;
@@ -2240,11 +2394,11 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2315,7 +2469,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2348,7 +2502,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
- const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ if (DisablePPCFloatInVariadic)
+ NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
NumGPArgRegs));
@@ -2357,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Make room for NumGPArgRegs and NumFPArgRegs.
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
- NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+ NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
@@ -2399,7 +2555,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
@@ -2437,6 +2593,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2448,8 +2605,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
- unsigned ArgOffset = LinkageSize;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2471,12 +2628,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ // Do a first pass over the arguments to determine whether the ABI
+ // guarantees that our caller has allocated the parameter save area
+ // on its stack frame. In the ELFv1 ABI, this is always the case;
+ // in the ELFv2 ABI, it is true if this is a vararg function or if
+ // any parameter is located in a stack slot.
+
+ bool HasParameterArea = !isELFv2ABI || isVarArg;
+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = Num_FPR_Regs;
+ unsigned AvailableVRs = Num_VR_Regs;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs))
+ HasParameterArea = true;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -2484,6 +2658,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
+ EVT OrigVT = Ins[ArgNo].ArgVT;
unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
@@ -2492,7 +2667,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
/* Respect alignment of argument on the stack. */
unsigned Align =
- CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
@@ -2520,15 +2695,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- // All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize && !isLittleEndian)
- CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
- // The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ // Create a stack object covering all stack doublewords occupied
+ // by the argument. If the argument is (fully or partially) on
+ // the stack, or if the argument is fully in registers but the
+ // caller has allocated the parameter save anyway, we can refer
+ // directly to the caller's stack frame. Otherwise, create a
+ // local copy in our own frame.
+ int FI;
+ if (HasParameterArea ||
+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+ else
+ FI = MFI->CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(FIN);
- if (ObjSize < 8) {
+ // Handle aggregates smaller than 8 bytes.
+ if (ObjSize < PtrByteSize) {
+ // The value of the object is its address, which differs from the
+ // address of the enclosing doubleword on big-endian systems.
+ SDValue Arg = FIN;
+ if (!isLittleEndian) {
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+ }
+ InVals.push_back(Arg);
+
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2537,18 +2728,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
- // slot. The address of the parameter was already calculated
- // above (InVals.push_back(FIN)) to be the right-justified
- // offset within the slot. For this store, we need a new
- // frame index that points at the beginning of the slot.
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ // slot.
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg),
false, false, 0);
@@ -2562,27 +2748,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
+ // The value of the object is its address, which is the address of
+ // its first stack doubleword.
+ InVals.push_back(FIN);
+
+ // Store whatever pieces of the object are in registers to memory.
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
- // Store whatever pieces of the object are in registers
- // to memory. ArgOffset will be the address of the beginning
- // of the object.
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
- MemOps.push_back(Store);
- ++GPR_idx;
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ArgSize - j;
+ if (GPR_idx == Num_GPR_Regs)
break;
+
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Addr = FIN;
+ if (j) {
+ SDValue Off = DAG.getConstant(j, PtrVT);
+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(FuncArg, j),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
}
+ ArgOffset += ArgSize;
continue;
}
@@ -2591,6 +2779,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
@@ -2608,6 +2799,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
@@ -2620,12 +2814,32 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
+ } else if (GPR_idx != Num_GPR_Regs) {
+ // This can only ever happen in the presence of f32 array types,
+ // since otherwise we never run out of FPRs before running out
+ // of GPRs.
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::f32) {
+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
needsLoad = true;
- ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
case MVT::v4f32:
case MVT::v4i32:
@@ -2633,6 +2847,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
@@ -2662,7 +2879,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea;
- MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ if (HasParameterArea)
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ else
+ MinReservedArea = LinkageSize;
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
@@ -2723,7 +2943,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2849,7 +3070,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -3336,6 +3557,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3352,42 +3574,41 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
}
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
- // Use indirect calls for ALL functions calls in JIT mode, since the
- // far-call stubs may be outside relocation limits for a BL instruction.
- if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
- unsigned OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (Subtarget.getTargetTriple().isMacOSX() &&
- Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
- (G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
- }
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(),
- 0, OpFlags);
- needIndirectCall = false;
+ unsigned OpFlags = 0;
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(), 0, OpFlags);
+ needIndirectCall = false;
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (Subtarget.getTargetTriple().isMacOSX() &&
- Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
@@ -3400,7 +3621,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64) {
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -3480,7 +3701,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -3491,6 +3712,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
+
+ // If this is a call to __tls_get_addr, find the symbol whose address
+ // is to be taken and add it to the list. This will be used to
+ // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
+ // We find the symbol by walking the chain to the CopyFromReg, walking
+ // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
+ // pulling the symbol from that node.
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
+ assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
+ SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
+ SDValue TGTAddr = AddI->getOperand(1);
+ assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
+ "Didn't find target global TLS address where we expected one");
+ Ops.push_back(TGTAddr);
+ CallOpc = PPCISD::CALL_TLS;
+ }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
@@ -3502,6 +3740,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (Callee.getNode() && isELFv2ABI)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
return CallOpc;
}
@@ -3522,8 +3764,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
@@ -3571,6 +3813,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -3589,7 +3833,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3636,7 +3881,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- }
+ } else if (CallOpc == PPCISD::CALL_TLS)
+ // For 64-bit SVR4, TLS calls are always non-local.
+ CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -3646,7 +3893,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
@@ -3735,11 +3982,12 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ PtrByteSize);
if (isVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -3776,7 +4024,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -3948,6 +4196,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -3966,21 +4215,27 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with at least 48 bytes, which
- // is reserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
/* Respect alignment of argument on the stack. */
- unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
NumBytes = ((NumBytes + Align - 1) / Align) * Align;
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
}
unsigned NumBytesActuallyUsed = NumBytes;
@@ -3990,6 +4245,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
// Tail call needs the stack to be aligned.
@@ -4056,10 +4312,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
/* Respect alignment of argument on the stack. */
unsigned Align =
- CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
/* Compute GPR index associated with argument offset. */
@@ -4103,7 +4361,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
@@ -4199,6 +4457,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
@@ -4209,39 +4470,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
break;
case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
+ case MVT::f64: {
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+
+ // Named arguments go into FPRs first, and once they overflow, the
+ // remaining arguments go into GPRs and then the parameter save area.
+ // Unnamed arguments for vararg functions always go to GPRs and
+ // then the parameter save area. For now, put all arguments to vararg
+ // routines always in both locations (FPR *and* GPR or stack slot).
+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+
+ // First load the argument into the next available FPR.
+ if (FPR_idx != NumFPRs)
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (isVarArg) {
- // A single float or an aggregate containing only a single float
- // must be passed right-justified in the stack doubleword, and
- // in the GPR, if one is available.
- SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
- !isLittleEndian) {
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
- StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- } else
- StoreOff = PtrOff;
-
- SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
- MachinePointerInfo(), false, false, 0);
- MemOpChains.push_back(Store);
-
- // Float varargs are always shadowed in available integer registers
- if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(), false, false,
- false, 0);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
- }
- }
+ // Next, load the argument into GPR or stack slot if needed.
+ if (!NeedGPROrStack)
+ ;
+ else if (GPR_idx != NumGPRs) {
+ // In the non-vararg case, this can only ever happen in the
+ // presence of f32 array types, since otherwise we never run
+ // out of FPRs before running out of GPRs.
+ SDValue ArgVal;
+
+ // Double values are always passed in a single GPR.
+ if (Arg.getValueType() != MVT::f32) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+ // Non-array float values are extended and passed in a GPR.
+ } else if (!Flags.isInConsecutiveRegs()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+ // If we have an array of floats, we collect every odd element
+ // together with its predecessor into one GPR.
+ } else if (ArgOffset % PtrByteSize != 0) {
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (!isLittleEndian)
+ std::swap(Lo, Hi);
+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+ // The final element, if even, goes into the first half of a GPR.
+ } else if (Flags.isInConsecutiveRegsLast()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+ if (!isLittleEndian)
+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+
+ // Non-final even elements are skipped; they will be handled
+ // together the with subsequent argument on the next go-around.
+ } else
+ ArgVal = SDValue();
+
+ if (ArgVal.getNode())
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
+ if (Arg.getValueType() == MVT::f32 &&
+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4250,14 +4542,25 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
+ }
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+
// For a varargs call, named arguments go into VRs or on the stack as
// usual; unnamed arguments always go to the stack or the corresponding
// GPRs when within range. For now, we always put the value in both
@@ -4328,11 +4631,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+ // This does not mean the MTCTR instruction must use R12; it's easier
+ // to model this as an extra parameter, so do that.
+ if (isELFv2ABI)
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4383,7 +4691,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
@@ -4522,7 +4831,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -4751,8 +5060,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_PPC);
}
@@ -4764,8 +5072,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
SDValue Flag;
@@ -5773,15 +6081,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
- PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
return Op;
}
}
@@ -5789,15 +6097,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
- PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -6252,11 +6561,44 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
// Other Lowering Code
//===----------------------------------------------------------------------===//
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Id);
+ return Builder.CreateCall(Func);
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (Ord == SequentiallyConsistent)
+ return callIntrinsic(Builder, Intrinsic::ppc_sync);
+ else if (isAtLeastRelease(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ else
+ return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (IsLoad && isAtLeastAcquire(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ // FIXME: this is too conservative, a dependent branch + isync is enough.
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ else
+ return nullptr;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -6318,7 +6660,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
bool is8bit, // operation
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -6446,7 +6789,8 @@ llvm::MachineBasicBlock*
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6545,7 +6889,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
- static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6594,7 +6938,8 @@ MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6613,7 +6958,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
- unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
+ (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
+ PPC::R29 : PPC::R30);
MachineInstrBuilder MIB;
@@ -6703,7 +7051,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitEHSjLjLongJmp(MI, BB);
}
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
@@ -6726,7 +7075,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
@@ -6735,11 +7085,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSRC ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -6770,7 +7124,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
BuildMI(BB, dl, TII->get(PPC::BC))
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
} else {
@@ -7131,151 +7487,54 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
- (VT == MVT::f64 && Subtarget.hasFRE()) ||
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPOne =
- DAG.getConstantFP(1.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPOne, FPOne, FPOne, FPOne);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
-
- return Est;
+ ++RefinementSteps;
+ UseOneConstNR = true;
+ return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
-
return SDValue();
}
-SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal sqrt, we need to find the zero of the function:
- // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
- // =>
- // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
- // As a result, we precompute A/2 prior to the iteration loop.
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPThreeHalves =
- DAG.getConstantFP(1.5, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPThreeHalves, FPThreeHalves,
- FPThreeHalves, FPThreeHalves);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
- // this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
-
- return Est;
+ ++RefinementSteps;
+ return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
-
return SDValue();
}
-// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
-// not enforce equality of the chain operands.
-static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
unsigned Bytes, int Dist,
SelectionDAG &DAG) {
- EVT VT = LS->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LS->getBasePtr();
SDValue BaseLoc = Base->getBasePtr();
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
@@ -7306,11 +7565,77 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
return false;
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
+ EVT VT = LS->getMemoryVT();
+ SDValue Loc = LS->getBasePtr();
+ return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_VOID) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
+ }
+
+ return false;
+}
+
// Return true is there is a nearyby consecutive load to the one provided
// (regardless of alignment). We search up and down the chain, looking though
-// token factors and other loads (but nothing else). As a result, a true
-// results indicates that it is safe to create a new consecutive load adjacent
-// to the load provided.
+// token factors and other loads (but nothing else). As a result, a true result
+// indicates that it is safe to create a new consecutive load adjacent to the
+// load provided.
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
SDValue Chain = LD->getChain();
EVT VT = LD->getMemoryVT();
@@ -7324,10 +7649,10 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
// nodes just above the top-level loads and token factors.
while (!Queue.empty()) {
SDNode *ChainNext = Queue.pop_back_val();
- if (!Visited.insert(ChainNext))
+ if (!Visited.insert(ChainNext).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
@@ -7355,17 +7680,17 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
- if (!Visited.insert(LoadRoot))
+ if (!Visited.insert(LoadRoot).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
UE = LoadRoot->use_end(); UI != UE; ++UI)
- if (((isa<LoadSDNode>(*UI) &&
- cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ if (((isa<MemSDNode>(*UI) &&
+ cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
Queue.push_back(*UI);
}
@@ -7485,7 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -7699,7 +8024,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -7936,92 +8261,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC:
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
- case ISD::FDIV: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV);
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV,
- N->getOperand(1).getOperand(1));
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- }
-
- SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
-
- }
- break;
- case ISD::FSQRT: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
- // reciprocal sqrt.
- SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
-
- EVT VT = RV.getValueType();
-
- SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
- Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
- }
-
- SDValue ZeroCmp =
- DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- DCI.AddToWorklist(ZeroCmp.getNode());
- DCI.AddToWorklist(RV.getNode());
-
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
- ZeroCmp, Zero, RV);
- return RV;
- }
- }
-
- }
- break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -8112,6 +8351,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ // P8 and later hardware should just use LOAD.
+ !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
@@ -8149,17 +8390,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Intrinsic::ppc_altivec_lvsl);
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
- // Refine the alignment of the original load (a "new" load created here
- // which was identical to the first except for the alignment would be
- // merged with the existing node regardless).
+ // Create the new MMO for the new base load. It is like the original MMO,
+ // but represents an area in memory almost twice the vector size centered
+ // on the original address. If the address is unaligned, we might start
+ // reading up to (sizeof(vector)-1) bytes below the address of the
+ // original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(LD->getPointerInfo(),
- LD->getMemOperand()->getFlags(),
- LD->getMemoryVT().getStoreSize(),
- ABIAlignment);
- LD->refineAlignment(MMO);
- SDValue BaseLoad = SDValue(LD, 0);
+ MachineMemOperand *BaseMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -LD->getMemoryVT().getStoreSize()+1,
+ 2*LD->getMemoryVT().getStoreSize()-1);
+
+ // Create the new base load.
+ SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
+ getPointerTy());
+ SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
+ SDValue BaseLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ BaseLoadOps, MVT::v4i32, BaseMMO);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
@@ -8181,21 +8430,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ MachineMemOperand *ExtraMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ 1, 2*LD->getMemoryVT().getStoreSize()-1);
+ SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue ExtraLoad =
- DAG.getLoad(VT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncOffset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), ABIAlignment);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ ExtraLoadOps, MVT::v4i32, ExtraMMO);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
- if (BaseLoad.getValueType() != MVT::v4i32)
- BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
-
- if (ExtraLoad.getValueType() != MVT::v4i32)
- ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
-
// Because vperm has a big-endian bias, we must reverse the order
// of the input vectors and complement the permute control vector
// when generating little endian code. We have already handled the
@@ -8212,36 +8458,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
- // Now we need to be really careful about how we update the users of the
- // original load. We cannot just call DCI.CombineTo (or
- // DAG.ReplaceAllUsesWith for that matter), because the load still has
- // uses created here (the permutation for example) that need to stay.
- SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- while (UI != UE) {
- SDUse &Use = UI.getUse();
- SDNode *User = *UI;
- // Note: BaseLoad is checked here because it might not be N, but a
- // bitcast of N.
- if (User == Perm.getNode() || User == BaseLoad.getNode() ||
- User == TF.getNode() || Use.getResNo() > 1) {
- ++UI;
- continue;
- }
-
- SDValue To = Use.getResNo() ? TF : Perm;
- ++UI;
-
- SmallVector<SDValue, 8> Ops;
- for (const SDUse &O : User->ops()) {
- if (O == Use)
- Ops.push_back(To);
- else
- Ops.push_back(O);
- }
-
- DAG.UpdateNodeOperands(User, Ops);
- }
-
+ // The output of the permutation is our loaded result, the TokenFactor is
+ // our new chain.
+ DCI.CombineTo(N, Perm, TF);
return SDValue(N, 0);
}
}
@@ -8659,7 +8878,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
@@ -8872,6 +9092,92 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -8931,9 +9237,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
-bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -8948,7 +9255,8 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
- if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+ VT != MVT::v4f32 && VT != MVT::v4i32)
return false;
} else {
return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index df05aa5..bb4d1f1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#include "PPC.h"
#include "PPCInstrInfo.h"
@@ -99,6 +99,10 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
+ /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
+ /// to access TLS variables.
+ CALL_TLS, CALL_NOP_TLS,
+
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@@ -181,6 +185,10 @@ namespace llvm {
/// on PPC32.
PPC32_GOT,
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS on PPC32.
+ PPC32_PICGOT,
+
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
@@ -210,10 +218,6 @@ namespace llvm {
/// sym\@got\@tlsgd\@l.
ADDI_TLSGD_L,
- /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd).
- GET_TLS_ADDR,
-
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
@@ -224,10 +228,6 @@ namespace llvm {
/// sym\@got\@tlsld\@l.
ADDI_TLSLD_L,
- /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld).
- GET_TLSLD_ADDR,
-
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
@@ -297,27 +297,28 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG);
+ unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG);
+ unsigned ShuffleKind, SelectionDAG &DAG);
- /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
- /// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+ /// shift amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -344,7 +345,7 @@ namespace llvm {
const PPCSubtarget &Subtarget;
public:
- explicit PPCTargetLowering(PPCTargetMachine &TM);
+ explicit PPCTargetLowering(const PPCTargetMachine &TM);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
@@ -355,6 +356,11 @@ namespace llvm {
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ /// Return true if target always beneficiates from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -403,6 +409,11 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
@@ -472,6 +483,10 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const override;
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -490,9 +505,10 @@ namespace llvm {
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- bool allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -510,6 +526,20 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ return Ty->isArrayTy();
+ }
+
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -533,6 +563,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -666,8 +698,12 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9318f70..9a19abb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
+
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
-// The following three definitions are selected for small code model only.
+// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
@@ -801,8 +804,12 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
"ld 2, $src", IIC_LdStLD,
[(PPCload_toc ixaddr:$src)]>, isPPC64;
@@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsADDR",
- [(set i64:$rD,
- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsldADDR",
- [(set i64:$rD,
- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
@@ -1135,3 +1132,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)),
def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
+// 64-bits atomic loads and stores
+def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+
+def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index dce46d8..4ef08eb 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,110 +22,143 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
- *CurDAG);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
- *CurDAG);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
- *CurDAG);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
- *CurDAG);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
@@ -242,48 +275,64 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
-let isCodeGenOnly = 1 in {
-def DSS : DSS_Form<822, (outs),
- (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSSALL : DSS_Form<822, (outs),
- (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DST : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTT : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTST : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTSTT : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
+def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
+ Deprecated<DeprecatedDST> {
+ let A = 0;
+ let B = 0;
+}
-def DST64 : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+def DSSALL : DSS_Form<1, 822, (outs), (ins),
+ "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>,
+ Deprecated<DeprecatedDST> {
+ let STRM = 0;
+ let A = 0;
+ let B = 0;
+}
+
+def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTT64 : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTST64 : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTSTT64 : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
+
+let isCodeGenOnly = 1 in {
+ // The very same instructions as above, but formally matching 64bit registers.
+ def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
@@ -731,30 +780,6 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
-// DS* intrinsics
-def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
-def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
-
-// * 32-bit
-def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
- (DST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, $rA, $rB)>;
-
-// * 64-bit
-def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
-
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -789,6 +814,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
(VPKUHUM $vA, $vA)>;
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+ (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUHUM $vB, $vA)>;
+
// Match vmrg*(x,x)
def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
(VMRGLB $vA, $vA)>;
@@ -803,6 +838,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
(VMRGHW $vA, $vA)>;
+// Match vmrg*(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHW $vB, $vA)>;
+
// Logical Operations
def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index b424d11..cf71b1c 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -17,8 +17,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRBUILDER_H
-#define POWERPC_INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 1e4396c..aa68497 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -380,6 +380,11 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
let Inst{31} = RC;
}
+class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> {
+ let RST = 0;
+}
+
// This is the same as XForm_base_r3xo, but the first two operands are swapped
// when code is emitted.
class XForm_base_r3xo_swapped
@@ -417,6 +422,22 @@ class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let B = 0;
}
+class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<1> WS;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{20} = WS;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -457,6 +478,52 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<4> CT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6} = 0;
+ let Inst{7-10} = CT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<4> SR;
+
+ let Inst{6-10} = RS;
+ let Inst{12-15} = SR;
+ let Inst{21-30} = xo;
+}
+
+class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> MO;
+
+ let Inst{6-10} = MO;
+ let Inst{21-30} = xo;
+}
+
+class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<5> RB;
+
+ let Inst{6-10} = RS;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+}
+
class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -764,10 +831,9 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
// DSS_Form - Form X instruction, used for altivec dss* instructions.
-class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
- bits<1> T;
bits<2> STRM;
bits<5> A;
bits<5> B;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9bac91d..daf8790 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -75,7 +75,7 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II =
- &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
@@ -331,6 +331,11 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode));
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(PPC::NOP);
+}
+
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -1617,6 +1622,7 @@ protected:
bool Changed = false;
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
@@ -1682,16 +1688,26 @@ protected:
// In theory, there could be other uses of the addend copy before this
// fma. We could deal with this, but that would require additional
// logic below and I suspect it will not occur in any relevant
- // situations.
- bool OtherUsers = false;
+ // situations. Additionally, check whether the copy source is killed
+ // prior to the fma. In order to replace the addend here with the
+ // source of the copy, it must still be live here. We can't use
+ // interval testing for a physical register, so as long as we're
+ // walking the MIs we may as well test liveness here.
+ bool OtherUsers = false, KillsAddendSrc = false;
for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
- J != JE; --J)
+ J != JE; --J) {
if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
OtherUsers = true;
break;
}
+ if (J->modifiesRegister(AddendSrcReg, TRI) ||
+ J->killsRegister(AddendSrcReg, TRI)) {
+ KillsAddendSrc = true;
+ break;
+ }
+ }
- if (OtherUsers)
+ if (OtherUsers || KillsAddendSrc)
continue;
// Find one of the product operands that is killed by this instruction.
@@ -1712,10 +1728,11 @@ protected:
if (!KilledProdOp)
continue;
- // In order to replace the addend here with the source of the copy,
- // it must still be live here.
- if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
- continue;
+ // For virtual registers, verify that the addend source register
+ // is live here (as should have been assured above).
+ assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+ LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+ "Addend source register is not live!");
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
@@ -1737,6 +1754,12 @@ protected:
unsigned OldFMAReg = MI->getOperand(0).getReg();
+ // The transformation doesn't work well with things like:
+ // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+ // so leave such things alone.
+ if (OldFMAReg == KilledProdReg)
+ continue;
+
assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
"Addend copy not tied to old FMA output!");
@@ -1827,7 +1850,7 @@ public:
LIS = &getAnalysis<LiveIntervals>();
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -1980,7 +2003,7 @@ public:
// If we don't have VSX on the subtarget, don't do anything.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2057,7 +2080,7 @@ public:
// If we don't have VSX don't bother doing anything here.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2214,7 +2237,7 @@ protected:
public:
bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 83f14c6..4d310fe 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRUCTIONINFO_H
-#define POWERPC_INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#include "PPC.h"
#include "PPCRegisterInfo.h"
@@ -228,6 +228,8 @@ public:
/// instruction may be. This returns the maximum number of bytes.
///
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c2e3382..8c76c46 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
+def tocentry32 : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
@@ -107,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
-def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
@@ -133,9 +134,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -417,6 +424,15 @@ def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+ let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u4imm : Operand<i32> {
+ let PrintMethod = "printU4ImmOperand";
+ let ParserMatchClass = PPCU4ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -446,7 +462,7 @@ def u6imm : Operand<i32> {
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
@@ -456,7 +472,7 @@ def s16imm : Operand<i32> {
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addU16ImmOperands";
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
@@ -466,7 +482,7 @@ def u16imm : Operand<i32> {
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s17imm : Operand<i32> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -542,7 +558,7 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
def PPCDispRIOperand : AsmOperandClass {
let Name = "DispRI"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
@@ -554,6 +570,27 @@ def PPCDispRIXOperand : AsmOperandClass {
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
}
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE2Operand;
+}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
@@ -571,6 +608,21 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let EncoderMethod = "getMemRIXEncoding";
let DecoderMethod = "decodeMemRIXOperands";
}
+def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE8DisEncoding";
+}
+def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE4DisEncoding";
+}
+def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE2DisEncoding";
+}
// A single-register address. This is used with the SjLj
// pseudo-instructions.
@@ -585,6 +637,12 @@ def tlsreg32 : Operand<i32> {
let EncoderMethod = "getTLSRegEncoding";
let ParserMatchClass = PPCTLSRegOperand;
}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
@@ -611,6 +669,12 @@ def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
+def IsE500 : Predicate<"PPCSubTarget->isE500()">;
+def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -967,6 +1031,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let Defs = [LR] in
def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
+let Defs = [LR] in
+ def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
@@ -1068,6 +1135,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
"bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+ "bl $func", IIC_BrB, []>;
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
@@ -1243,8 +1312,15 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
+def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
- (DCBT xoaddr:$dst)>;
+ (DCBT xoaddr:$dst)>; // data prefetch for loads
+def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+ (DCBTST xoaddr:$dst)>; // data prefetch for stores
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+ (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
// Atomic operations
let usesCustomInserter = 1 in {
@@ -1628,17 +1704,19 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+ "sync $L", IIC_LdStSync, []>;
let isCodeGenOnly = 1 in {
def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
- "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+ "msync", IIC_LdStSync, []> {
let L = 0;
}
}
-def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -2355,6 +2433,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -2393,13 +2473,47 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+ []>, NoEncode<"$rT">;
+
def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
- "#LDgotTprelL32",
- [(set i32:$rD,
- (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsgdL32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsldL32",
+ [(set i32:$rD,
+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDIdtprelL32",
+ [(set i32:$rD,
+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDISdtprelHA32",
+ [(set i32:$rD,
+ (PPCaddisDtprelHA i32:$reg,
+ tglobaltlsaddr:$disp))]>;
+
+// Support for Position-independent code
+def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
@@ -2434,8 +2548,15 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
+// Only seq_cst fences require the heavyweight sync (SYNC 0).
+// All others can use the lightweight sync (SYNC 1).
+// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
+// versions of Power.
+def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2454,6 +2575,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
(FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
include "PPCInstrAltivec.td"
+include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
@@ -2970,6 +3092,16 @@ def : Pat<(i1 (not (trunc i64:$in))),
// PowerPC Instructions used for assembler/disassembler only
//
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def LSWI : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B),
+ "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B),
+ "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
"isync", IIC_SprISYNC, []>;
@@ -2982,9 +3114,47 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", IIC_LdStLoad, []>;
+def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
+ "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR),
+ "mtsr $SR, $RS", IIC_SprMTSR>;
+
+def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR),
+ "mfsr $RS, $SR", IIC_SprMFSR>;
+
+def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
+ "mtsrin $RS, $RB", IIC_SprMTSR>;
+
+def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
+ "mfsrin $RS, $RB", IIC_SprMFSR>;
+
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsr $RS, $L", IIC_SprMTMSR>;
+def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
+ "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> {
+ let L = 0;
+}
+
+def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
+ Requires<[IsBookE]> {
+ bits<1> E;
+
+ let Inst{16} = E;
+ let Inst{21-30} = 163;
+}
+
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
+ "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
+ "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", IIC_SprMFMSR, []>;
@@ -3002,15 +3172,66 @@ def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+def TLBIA : XForm_0<31, 370, (outs), (ins),
+ "tlbia", IIC_SprTLBIA, []>;
+
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
"tlbsync", IIC_SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
"tlbiel $RB", IIC_SprTLBIEL, []>;
+def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
+ "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
+ "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
"tlbie $RB,$RS", IIC_SprTLBIE, []>;
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
+ "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
+ "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
+ "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
+ "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>;
+def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
+ (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>, isDOT;
+
+def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
+
+def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>,
+ Requires<[IsBookE]>;
+def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>,
+ Requires<[IsBookE]>;
+
+def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
+ Requires<[IsE500]>;
+def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
+ Requires<[IsE500]>;
+
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
+ "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -3033,15 +3254,17 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
-def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
-def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
def : InstAlias<"wait", (WAIT 0)>;
def : InstAlias<"waitrsv", (WAIT 1)>;
def : InstAlias<"waitimpl", (WAIT 2)>;
+def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3050,9 +3273,57 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
+
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
+
+def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
+def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+
def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
@@ -3063,6 +3334,60 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+foreach BATR = 0-3 in {
+ def : InstAlias<"mtdbatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtdbatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>,
+ Requires<[IsPPC6xx]>;
+}
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+
+def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
+
def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
@@ -3082,25 +3407,25 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
-def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
-
-def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
-def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
-
-def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
@@ -3119,6 +3444,15 @@ def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -3367,3 +3701,18 @@ defm : TrapExtendedMnemonic<"lnl", 5>;
defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>;
+// Atomic loads
+def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>;
+def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>;
+def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>;
+def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>;
+def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
+def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
+
+// Atomic stores
+def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
new file mode 100644
index 0000000..cc3a4d2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -0,0 +1,447 @@
+//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Signal Processing Engine extension to
+// the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> {
+ let RB = 0;
+}
+
+class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> crD;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-8} = crD;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<21> D;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{20} = D{0};
+ let Inst{19} = D{1};
+ let Inst{18} = D{2};
+ let Inst{17} = D{3};
+ let Inst{16} = D{4};
+ let Inst{15} = D{5};
+ let Inst{14} = D{6};
+ let Inst{13} = D{7};
+ let Inst{12} = D{8};
+ let Inst{11} = D{9};
+ let Inst{11-20} = D{0-9};
+ let Inst{21-31} = xo;
+}
+
+let Predicates = [HasSPE], isAsmParserOnly = 1 in {
+
+def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldd $RT, $dst", IIC_VecFP>;
+def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldw $RT, $dst", IIC_VecFP>;
+def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldh $RT, $dst", IIC_VecFP>;
+def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhesplat $RT, $dst", IIC_VecFP>;
+def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhousplat $RT, $dst", IIC_VecFP>;
+def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhossplat $RT, $dst", IIC_VecFP>;
+def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhe $RT, $dst", IIC_VecFP>;
+def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhou $RT, $dst", IIC_VecFP>;
+def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhos $RT, $dst", IIC_VecFP>;
+def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwwsplat $RT, $dst", IIC_VecFP>;
+def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhsplat $RT, $dst", IIC_VecFP>;
+
+def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdd $RT, $dst", IIC_VecFP>;
+def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdh $RT, $dst", IIC_VecFP>;
+def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdw $RT, $dst", IIC_VecFP>;
+def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwhe $RT, $dst", IIC_VecFP>;
+def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwho $RT, $dst", IIC_VecFP>;
+def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwe $RT, $dst", IIC_VecFP>;
+def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwo $RT, $dst", IIC_VecFP>;
+
+def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA),
+ "evmra $RT, $RA", IIC_VecFP> {
+ let RB = 0;
+}
+
+def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "brinc $RT, $RA, $RB", IIC_VecFP>;
+def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA),
+ "evabs $RT, $RA", IIC_VecFP>;
+
+def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evaddiw $RT, $RB, $RA", IIC_VecFP>;
+def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddsmiaaw $RT, $RA", IIC_VecFP>;
+def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddssiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddusiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddumiaaw $RT, $RA", IIC_VecFP>;
+def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evaddw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evand $RT, $RA, $RB", IIC_VecFP>;
+def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evandc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpeq $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgtu $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmplts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpltu $crD, $RA, $RB", IIC_VecFP>;
+
+def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlsw $RT, $RA", IIC_VecFP>;
+def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlzw $RT, $RA", IIC_VecFP>;
+
+def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivws $RT, $RA, $RB", IIC_VecFP>;
+def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "eveqv $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsb $RT, $RA", IIC_VecFP>;
+def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsh $RT, $RA", IIC_VecFP>;
+
+def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlddx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldwx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldhx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhoux $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhosx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehi $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehilo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelohi $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumian $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousianw $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumian $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnand $RT, $RA, $RB", IIC_VecFP>;
+
+def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA),
+ "evneg $RT, $RA", IIC_VecFP>;
+
+def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnor $RT, $RA, $RB", IIC_VecFP>;
+def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evor $RT, $RA, $RB", IIC_VecFP>;
+def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evorc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evrlwi $RT, $RA, $RB", IIC_VecFP>;
+def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evrlw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA),
+ "evrndw $RT, $RA", IIC_VecFP>;
+
+def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evslwi $RT, $RA, $RB", IIC_VecFP>;
+def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evslw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplatfi $RT, $RA", IIC_VecFP>;
+def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplati $RT, $RA", IIC_VecFP>;
+
+def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwis $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwiu $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrws $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstddx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdhx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdwx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhox $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwox $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfssiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfsmiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfumiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfusiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsubfw $RT, $RA, $RB", IIC_VecFP>;
+def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB),
+ "evsubifw $RT, $RA, $RB", IIC_VecFP>;
+def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evxor $RT, $RA, $RB", IIC_VecFP>;
+
+} // HasSPE
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 49bcc48..2c8f998 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -47,23 +47,24 @@ let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, canFoldAsLoad = 1 in {
- def LXSDX : XForm_1<31, 588,
+ def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
- def LXVD2X : XForm_1<31, 844,
+ def LXVD2X : XX1Form<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
"lxvd2x $XT, $src", IIC_LdStLFD,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
+ [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
- def LXVDSX : XForm_1<31, 332,
+ def LXVDSX : XX1Form<31, 332,
(outs vsrc:$XT), (ins memrr:$src),
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
- def LXVW4X : XForm_1<31, 780,
+ def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
- "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
}
// Store indexed instructions
@@ -76,11 +77,12 @@ let Uses = [RM] in {
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
- "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
}
// Add/Mul Instructions
@@ -641,24 +643,36 @@ let Uses = [RM] in {
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
def XSMINDP : XX3Form<60, 168,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
def XVMAXDP : XX3Form<60, 224,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
def XVMINDP : XX3Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
def XVMAXSP : XX3Form<60, 192,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
def XVMINSP : XX3Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
} // Uses = [RM]
@@ -715,6 +729,31 @@ let Uses = [RM] in {
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // neverHasSideEffects
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+
+ def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+} // usesCustomInserter
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -811,6 +850,49 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
(XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+// Loads.
+def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
} // AddedComplexity
} // HasVSX
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
deleted file mode 100644
index e5f113a..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the 32-bit PowerPC target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCJITInfo.h"
-#include "PPCRelocations.h"
-#include "PPCSubtarget.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
- : Subtarget(STI), is64Bit(STI.isPPC64()) {
- useGOT = 0;
-}
-
-#define BUILD_ADDIS(RD,RS,IMM16) \
- ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_ORI(RD,RS,UIMM16) \
- ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_ORIS(RD,RS,UIMM16) \
- ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_RLDICR(RD,RS,SH,ME) \
- ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
- (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
-#define BUILD_MTSPR(RS,SPR) \
- ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
-#define BUILD_BCCTRx(BO,BI,LINK) \
- ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
-#define BUILD_B(TARGET, LINK) \
- ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
-
-// Pseudo-ops
-#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
-#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
-#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
-#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
-
-static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
- intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
- unsigned *AtI = (unsigned*)(intptr_t)At;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- AtI[0] = BUILD_B(Offset, isCall); // b/bl target
- } else if (!is64Bit) {
- AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_MTCTR(12); // mtctr r12
- AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
- } else {
- AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
- AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
- AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[5] = BUILD_MTCTR(12); // mtctr r12
- AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
- }
-}
-
-extern "C" void PPC32CompilationCallback();
-extern "C" void PPC64CompilationCallback();
-
-// The first clause of the preprocessor directive looks wrong, but it is
-// necessary when compiling this code on non-PowerPC hosts.
-#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
-void PPC32CompilationCallback() {
- llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
-}
-#elif !defined(__ELF__)
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC32CompilationCallback\n"
-"_PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // PowerPC32 ABI linkage - 24 bytes
- // parameters - 32 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 32 bytes
- "mflr r0\n"
- "stw r0, 8(r1)\n"
- "stwu r1, -208(r1)\n"
- // Save all int arg registers
- "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
- "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
- "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
- "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
- // Save all call-clobbered FP regs.
- "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
- "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
- "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
- "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
- "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
- "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
- "stfd f1, 72(r1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr r3, r0\n"
- "lwz r2, 208(r1)\n" // stub's frame
- "lwz r4, 8(r2)\n" // stub's lr
- "li r5, 0\n" // 0 == 32 bit
- "bl _LLVMPPCCompilationCallback\n"
- "mtctr r3\n"
- // Restore all int arg registers
- "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
- "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
- "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
- "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
- // Restore all FP arg registers
- "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
- "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
- "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
- "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
- "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
- "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
- "lfd f1, 72(r1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz r1, 208(r1)\n"
- "lwz r2, 8(r1)\n"
- "mtlr r2\n"
- "bctr\n"
- );
-
-#else
-// ELF PPC 32 support
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC32CompilationCallback\n"
-"PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // 8 double registers - 64 bytes
- // 8 int registers - 32 bytes
- "mflr 0\n"
- "stw 0, 4(1)\n"
- "stwu 1, -104(1)\n"
- // Save all int arg registers
- "stw 10, 100(1)\n" "stw 9, 96(1)\n"
- "stw 8, 92(1)\n" "stw 7, 88(1)\n"
- "stw 6, 84(1)\n" "stw 5, 80(1)\n"
- "stw 4, 76(1)\n" "stw 3, 72(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 8, 64(1)\n"
- "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
- "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
- "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
- "stfd 1, 8(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr 3, 0\n"
- "lwz 5, 104(1)\n" // stub's frame
- "lwz 4, 4(5)\n" // stub's lr
- "li 5, 0\n" // 0 == 32 bit
- "bl LLVMPPCCompilationCallback\n"
- "mtctr 3\n"
- // Restore all int arg registers
- "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
- "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
- "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
- "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
- // Restore all FP arg registers
- "lfd 8, 64(1)\n"
- "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
- "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
- "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
- "lfd 1, 8(1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz 1, 104(1)\n"
- "lwz 0, 4(1)\n"
- "mtlr 0\n"
- "bctr\n"
- );
-#endif
-
-#if !defined(__powerpc64__) && !defined(__ppc64__)
-void PPC64CompilationCallback() {
- llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
-}
-#else
-# ifdef __ELF__
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC64CompilationCallback\n"
-#if _CALL_ELF == 2
- ".type PPC64CompilationCallback,@function\n"
-"PPC64CompilationCallback:\n"
-#else
- ".section \".opd\",\"aw\",@progbits\n"
- ".align 3\n"
-"PPC64CompilationCallback:\n"
- ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
- ".size PPC64CompilationCallback,24\n"
- ".previous\n"
- ".align 4\n"
- ".type PPC64CompilationCallback,@function\n"
-".L.PPC64CompilationCallback:\n"
-#endif
-# else
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC64CompilationCallback\n"
-"_PPC64CompilationCallback:\n"
-# endif
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // Set up a proper stack frame
- // Layout
- // PowerPC64 ABI linkage - 48 bytes
- // parameters - 64 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 64 bytes
- "mflr 0\n"
- "std 0, 16(1)\n"
- "stdu 1, -280(1)\n"
- // Save all int arg registers
- "std 10, 272(1)\n" "std 9, 264(1)\n"
- "std 8, 256(1)\n" "std 7, 248(1)\n"
- "std 6, 240(1)\n" "std 5, 232(1)\n"
- "std 4, 224(1)\n" "std 3, 216(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
- "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
- "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
- "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
- "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
- "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
- "stfd 1, 112(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 1.
- "mr 3, 0\n" // return address (still in r0)
- "ld 5, 280(1)\n" // stub's frame
- "ld 4, 16(5)\n" // stub's lr
- "li 5, 1\n" // 1 == 64 bit
-# ifdef __ELF__
- "bl LLVMPPCCompilationCallback\n"
- "nop\n"
-# else
- "bl _LLVMPPCCompilationCallback\n"
-# endif
- "mtctr 3\n"
- // Restore all int arg registers
- "ld 10, 272(1)\n" "ld 9, 264(1)\n"
- "ld 8, 256(1)\n" "ld 7, 248(1)\n"
- "ld 6, 240(1)\n" "ld 5, 232(1)\n"
- "ld 4, 224(1)\n" "ld 3, 216(1)\n"
- // Restore all FP arg registers
- "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
- "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
- "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
- "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
- "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
- "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
- "lfd 1, 112(1)\n"
- // Pop 3 frames off the stack and branch to target
- "ld 1, 280(1)\n"
- "ld 0, 16(1)\n"
- "mtlr 0\n"
- // XXX: any special TOC handling in the ELF case for JIT?
- "bctr\n"
- );
-#endif
-
-extern "C" {
-LLVM_LIBRARY_VISIBILITY void *
-LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
- // Adjust the pointer to the address of the call instruction in the stub
- // emitted by emitFunctionStub, rather than the instruction after it.
- unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
- unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
-
- void *Target = JITCompilerFunction(StubCallAddr);
-
- // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
- // it to branch directly to the destination. If so, rewrite it so it does not
- // need to go through the stub anymore.
- unsigned OrigCallInst = *OrigCallAddr;
- if ((OrigCallInst >> 26) == 18) { // Direct call.
- intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- // Clear the original target out.
- OrigCallInst &= (63 << 26) | 3;
- // Fill in the new target.
- OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
- // Replace the call.
- *OrigCallAddr = OrigCallInst;
- }
- }
-
- // Assert that we are coming from a stub that was created with our
- // emitFunctionStub.
- if ((*StubCallAddr >> 26) == 18)
- StubCallAddr -= 3;
- else {
- assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
- StubCallAddr -= is64Bit ? 9 : 6;
- }
-
- // Rewrite the stub with an unconditional branch to the target, for any users
- // who took the address of the stub.
- EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
-
- // Put the address of the target function to call and the address to return to
- // after calling the target function in a place that is easy to get on the
- // stack after we restore all regs.
- return Target;
-}
-}
-
-
-
-TargetJITInfo::LazyResolverFn
-PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
- JITCompilerFunction = Fn;
- return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
-}
-
-TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
- // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
- // instructions to save the caller's address if this is a lazy-compilation
- // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
- // into a register and jump through it.
- StubLayout Result = {10*4, 4};
- return Result;
-}
-
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
-defined(__APPLE__)
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-#endif
-
-void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- // If this is just a call to an external function, emit a branch instead of a
- // call. The code is the same except for one bit of the last instruction.
- if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
- Fn != (void*)(intptr_t)PPC64CompilationCallback) {
- void *Addr = (void*)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 7*4);
- return Addr;
- }
-
- void *Addr = (void*)JCE.getCurrentPCValue();
- if (is64Bit) {
- JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (Subtarget.isDarwinABI()){
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
- } else {
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
- }
- intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 10*4);
- return Addr;
-}
-
-
-void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
- intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
- switch ((PPC::RelocationType)MR->getRelocationType()) {
- default: llvm_unreachable("Unknown relocation type!");
- case PPC::reloc_pcrel_bx:
- // PC-relative relocation for b and bl instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
- break;
- case PPC::reloc_pcrel_bcx:
- // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
- // bcx instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
- break;
- case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
- case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
- ResultPtr += MR->getConstantVal();
-
- // If this is a high-part access, get the high-part.
- if (MR->getRelocationType() == PPC::reloc_absolute_high) {
- // If the low part will have a carry (really a borrow) from the low
- // 16-bits into the high 16, add a bit to borrow from.
- if (((int)ResultPtr << 16) < 0)
- ResultPtr += 1 << 16;
- ResultPtr >>= 16;
- }
-
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
- unsigned HighBits = *RelocPos & ~65535;
- *RelocPos = LowBits | HighBits; // Slam into low 16-bits
- break;
- }
- case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
- ResultPtr += MR->getConstantVal();
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
- unsigned HighBits = *RelocPos & 0xFFFF0003;
- *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
- break;
- }
- }
- }
-}
-
-void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Old, 7*4);
-}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
deleted file mode 100644
index b6b37ff..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PowerPC implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef POWERPC_JITINFO_H
-#define POWERPC_JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class PPCSubtarget;
-class PPCJITInfo : public TargetJITInfo {
-protected:
- PPCSubtarget &Subtarget;
- bool is64Bit;
-
-public:
- PPCJITInfo(PPCSubtarget &STI);
-
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
- unsigned char *GOTBase) override;
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-};
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f8e84a5..880b520 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
@@ -37,14 +38,16 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCContext &Ctx = AP.OutContext;
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
SmallString<128> Name;
StringRef Suffix;
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB)
- Suffix = "$stub";
- else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+ if (isDarwin)
+ Suffix = "$stub";
+ } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
Suffix = "$non_lazy_ptr";
if (!Suffix.empty())
@@ -68,7 +71,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
// If the target flags on the operand changes the name of the symbol, do that
// before we return the symbol.
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI(AP).getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -134,8 +137,17 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
+ case PPCII::MO_TLSGD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case PPCII::MO_TLSLD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
}
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+ RefKind = MCSymbolRefExpr::VK_PLT;
+
const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 6a0aec8..4aff95a 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,8 +8,17 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
void PPCFunctionInfo::anchor() { }
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+ const DataLayout *DL = MF.getSubtarget().getDataLayout();
+ return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+ Twine(MF.getFunctionNumber())+"$poff");
+}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 33f843d..83de799 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_MACHINE_FUNCTION_INFO_H
-#define PPC_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
@@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// 64-bit SVR4 ABI.
SmallVector<unsigned, 3> MustSaveCRs;
+ /// Hold onto our MachineFunction context.
+ MachineFunction &MF;
+
+ /// Whether this uses the PIC Base register or not.
+ bool UsesPICBase;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -109,7 +115,9 @@ public:
VarArgsStackOffset(0),
VarArgsNumGPR(0),
VarArgsNumFPR(0),
- CRSpillFrameIndex(0) {}
+ CRSpillFrameIndex(0),
+ MF(MF),
+ UsesPICBase(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -170,6 +178,11 @@ public:
const SmallVectorImpl<unsigned> &
getMustSaveCRs() const { return MustSaveCRs; }
void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+ void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+ bool usesPICBase() const { return UsesPICBase; }
+
+ MCSymbol *getPICOffsetSymbol() const;
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 17b836d..8a1d680 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 292 entries have cost 1
// 1384 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index eca774e..9b9966f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -140,8 +140,8 @@ PPCRegisterInfo::getNoPreservedMask() const {
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (PPCFI->needsFP(MF))
Reserved.set(PPC::R31);
- if (hasBasePointer(MF))
+ if (hasBasePointer(MF)) {
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R29);
+ else
+ Reserved.set(PPC::R30);
+ }
+
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -214,7 +223,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned
PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -278,7 +287,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Determine whether 64-bit pointers are used.
bool LP64 = Subtarget.isPPC64();
DebugLoc dl = MI.getDebugLoc();
@@ -289,7 +298,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -394,7 +406,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -438,7 +450,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -511,7 +523,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -554,7 +566,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -601,7 +613,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -626,7 +638,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -706,7 +718,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -831,7 +843,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!Subtarget.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -843,7 +855,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- return Subtarget.isPPC64() ? PPC::X30 : PPC::R30;
+ if (Subtarget.isPPC64())
+ return PPC::X30;
+
+ if (Subtarget.isSVR4ABI() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ return PPC::R29;
+
+ return PPC::R30;
}
bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
@@ -868,7 +887,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -885,16 +907,6 @@ bool PPCRegisterInfo::
needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
assert(Offset < 0 && "Local offset must be negative");
- unsigned FIOperandNum = 0;
- while (!MI->getOperand(FIOperandNum).isFI()) {
- ++FIOperandNum;
- assert(FIOperandNum < MI->getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
- Offset += MI->getOperand(OffsetOperandNo).getImm();
-
// It's the load/store FI references that cause issues, as it can be difficult
// to materialize the offset if it won't fit in the literal field. Estimate
// based on the size of the local frame and some conservative assumptions
@@ -916,8 +928,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
unsigned StackEst =
PPCFI->determineFrameLayout(MF, false, true);
@@ -951,7 +963,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -976,7 +988,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.constrainRegClass(BaseReg,
@@ -985,6 +997,16 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 13a35f6..c182f95 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC32_REGISTERINFO_H
-#define POWERPC32_REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
#include "PPC.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
deleted file mode 100644
index 0b392f9..0000000
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCRELOCATIONS_H
-#define PPCRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-// Hack to rid us of a PPC pre-processor symbol which is erroneously
-// defined in a PowerPC header file (bug in Linux/PPC)
-#ifdef PPC
-#undef PPC
-#endif
-
-namespace llvm {
- namespace PPC {
- enum RelocationType {
- // reloc_vanilla - A standard relocation, where the address of the
- // relocated object completely overwrites the address of the relocation.
- reloc_vanilla,
-
- // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
- reloc_pcrel_bx,
-
- // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
- // and other bcx instructions.
- reloc_pcrel_bcx,
-
- // reloc_absolute_high - Absolute relocation, for the loadhi instruction
- // (which is really addis). Add the high 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_high,
-
- // reloc_absolute_low - Absolute relocation, for the la instruction (which
- // is really an addi). Add the low 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_low,
-
- // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
- // instruction which have two implicit zero bits.
- reloc_absolute_low_ix
- };
- }
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 1221d41..7f80121 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -106,6 +106,7 @@ def IIC_SprSLBIE : InstrItinClass;
def IIC_SprSLBMTE : InstrItinClass;
def IIC_SprSLBMFEE : InstrItinClass;
def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIA : InstrItinClass;
def IIC_SprTLBIEL : InstrItinClass;
def IIC_SprTLBIE : InstrItinClass;
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index b2e7f3b..2c1378d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCCSELECTIONDAGINFO_H
-#define POWERPCCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 2e1b74a..04e7ec6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -33,13 +33,12 @@ using namespace llvm;
#include "PPCGenSubtargetInfo.inc"
/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
+static std::string getDataLayoutString(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
std::string Ret;
// Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
+ if (T.getArch() == Triple::ppc64le)
Ret = "e";
else
Ret = "E";
@@ -48,18 +47,18 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
// PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
// pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ if (!is64Bit || T.getOS() == Triple::Lv2)
Ret += "-p:32:32";
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
+ if (is64Bit || !T.isOSDarwin())
Ret += "-i64:64";
else
Ret += "-f64:32:64";
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
+ if (is64Bit)
Ret += "-n32:64";
else
Ret += "-n32";
@@ -70,47 +69,20 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM,
- bool is64Bit, CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel),
- FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ const std::string &FS, const PPCTargetMachine &TM)
+ : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ DL(getDataLayoutString(TargetTriple)),
+ IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le),
+ TargetABI(PPC_ABI_UNKNOWN),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
TLInfo(TM), TSInfo(&DL) {}
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void PPCSubtarget::SetJITMode() {
- // JIT mode doesn't want lazy resolver stubs, it knows exactly where
- // everything is. This matters for PPC64, which codegens in PIC mode without
- // stubs.
- HasLazyResolverStubs = false;
-
- // Calls to external functions need to use indirect calls
- IsJITCodeModel = true;
-}
-
-void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
DarwinDirective = PPC::DIR_NONE;
@@ -119,8 +91,10 @@ void PPCSubtarget::initializeEnvironment() {
Use64BitRegs = false;
UseCRBits = false;
HasAltivec = false;
+ HasSPE = false;
HasQPX = false;
HasVSX = false;
+ HasP8Vector = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -136,13 +110,16 @@ void PPCSubtarget::initializeEnvironment() {
HasPOPCNTD = false;
HasLDBRX = false;
IsBookE = false;
+ HasOnlyMSYNC = false;
+ IsPPC4xx = false;
+ IsPPC6xx = false;
+ IsE500 = false;
DeprecatedMFTB = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
- IsJITCodeModel = false;
}
-void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
@@ -156,35 +133,13 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
- // Make sure 64-bit features are available when CPUname is generic
- std::string FullFS = FS;
-
- // If we are generating code for ppc64, verify that options make sense.
- if (IsPPC64) {
- Has64BitSupport = true;
- // Silently force 64-bit register use on ppc64.
- Use64BitRegs = true;
- if (!FullFS.empty())
- FullFS = "+64bit," + FullFS;
- else
- FullFS = "+64bit";
- }
-
- // At -O2 and above, track CR bits as individual registers.
- if (OptLevel >= CodeGenOpt::Default) {
- if (!FullFS.empty())
- FullFS = "+crbits," + FullFS;
- else
- FullFS = "+crbits";
- }
-
// Parse features string.
- ParseSubtargetFeatures(CPUName, FullFS);
+ ParseSubtargetFeatures(CPUName, FS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
- if (use64BitRegs() && !has64BitSupport())
- Use64BitRegs = false;
+ if (IsPPC64 && has64BitSupport())
+ Use64BitRegs = true;
// Set up darwin-specific properties.
if (isDarwin())
@@ -201,8 +156,20 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// FIXME: For now, we disable VSX in little-endian mode until endian
// issues in those instructions can be addressed.
- if (IsLittleEndian)
+ if (IsLittleEndian) {
HasVSX = false;
+ HasP8Vector = false;
+ }
+
+ // Determine default ABI.
+ if (TargetABI == PPC_ABI_UNKNOWN) {
+ if (!isDarwin() && IsPPC64) {
+ if (IsLittleEndian)
+ TargetABI = PPC_ABI_ELFv2;
+ else
+ TargetABI = PPC_ABI_ELFv1;
+ }
+ }
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -213,31 +180,13 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
// We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
- // If symbol visibility is hidden, the extra load is not needed if
- // the symbol is definitely defined in the current translation unit.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->isDeclaration();
if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
return false;
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
-bool PPCSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_ALL;
-
- CriticalPathRCs.clear();
-
- if (isPPC64())
- CriticalPathRCs.push_back(&PPC::G8RCRegClass);
- else
- CriticalPathRCs.push_back(&PPC::GPRCRegClass);
-
- return OptLevel >= CodeGenOpt::Default;
-}
-
// Embedded cores need aggressive scheduling (and some others also benefit).
static bool needsAggressiveScheduling(unsigned Directive) {
switch (Directive) {
@@ -259,6 +208,19 @@ bool PPCSubtarget::enableMachineScheduler() const {
return needsAggressiveScheduling(DarwinDirective);
}
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+ return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(isPPC64() ?
+ &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 2a16699..1df19c3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCSUBTARGET_H
-#define POWERPCSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
-#include "PPCJITInfo.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -66,6 +65,12 @@ class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
protected:
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ // Calculates type size & alignment
+ const DataLayout DL;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
@@ -83,8 +88,10 @@ protected:
bool UseCRBits;
bool IsPPC64;
bool HasAltivec;
+ bool HasSPE;
bool HasQPX;
bool HasVSX;
+ bool HasP8Vector;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -97,22 +104,23 @@ protected:
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
+ bool HasOnlyMSYNC;
+ bool IsE500;
+ bool IsPPC4xx;
+ bool IsPPC6xx;
bool DeprecatedMFTB;
bool DeprecatedDST;
bool HasLazyResolverStubs;
- bool IsJITCodeModel;
bool IsLittleEndian;
- /// TargetTriple - What processor and OS we're targeting.
- Triple TargetTriple;
-
- /// OptLevel - What default optimization level we're emitting code for.
- CodeGenOpt::Level OptLevel;
+ enum {
+ PPC_ABI_UNKNOWN,
+ PPC_ABI_ELFv1,
+ PPC_ABI_ELFv2
+ } TargetABI;
PPCFrameLowering FrameLowering;
- const DataLayout DL;
PPCInstrInfo InstrInfo;
- PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
@@ -121,17 +129,12 @@ public:
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
- CodeGenOpt::Level OptLevel);
+ const std::string &FS, const PPCTargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// SetJITMode - This is called to inform the subtarget info that we are
- /// producing code for the JIT.
- void SetJITMode();
-
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -143,24 +146,32 @@ public:
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-
- const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const DataLayout *getDataLayout() const { return &DL; }
- const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- PPCJITInfo *getJITInfo() { return &JITInfo; }
- const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
- const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ const PPCFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the PowerPC target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
@@ -186,9 +197,6 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
- // isJITCodeModel - True if we're generating code for the JIT
- bool isJITCodeModel() const { return IsJITCodeModel; }
-
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -205,13 +213,19 @@ public:
bool hasFPRND() const { return HasFPRND; }
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasSPE() const { return HasSPE; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
+ bool hasP8Vector() const { return HasP8Vector; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
+ bool isPPC4xx() const { return IsPPC4xx; }
+ bool isPPC6xx() const { return IsPPC6xx; }
+ bool isE500() const { return IsE500; }
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -222,18 +236,22 @@ public:
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
-
- /// enablePostRAScheduler - True at 'More' optimization.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const override;
+ bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
bool enableEarlyIfConversion() const override { return hasISEL(); }
// Scheduling customization.
bool enableMachineScheduler() const override;
+ // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostMachineScheduler() const override;
+ AntiDepBreakMode getAntiDepBreakMode() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9563b90..f15189c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
#include "PPC.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -37,15 +39,54 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
+ std::string FullFS = FS;
+ Triple TargetTriple(TT);
+
+ // Make sure 64-bit features are available when CPUname is generic
+ if (TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le) {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ if (OL >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+ return FullFS;
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return make_unique<TargetLoweringObjectFileMachO>();
+
+ return make_unique<PPC64LinuxTargetObjectFile>();
+}
+
+// The FeatureString here is a little subtle. We are modifying the feature string
+// with what are (currently) non-function specific overrides as it goes into the
+// LLVMTargetMachine constructor and then using the stored value in the
+// Subtarget constructor below it.
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
+ CM, OL),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, TargetFS, *this) {
initAsmInfo();
}
+PPCTargetMachine::~PPCTargetMachine() {}
+
void PPC32TargetMachine::anchor() { }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
@@ -53,7 +94,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
void PPC64TargetMachine::anchor() { }
@@ -63,9 +104,34 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
+const PPCSubtarget *
+PPCTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
@@ -86,6 +152,7 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
@@ -99,6 +166,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
+void PPCPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ TargetPassConfig::addIRPasses();
+}
+
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
@@ -148,18 +220,6 @@ bool PPCPassConfig::addPreEmitPass() {
return false;
}
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
- // writing?
- Subtarget.SetJITMode();
-
- // Machine code emitter pass for PowerPC.
- PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-
- return false;
-}
-
void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our PPC pass. This
// allows the PPC pass to delegate to the target independent layer when
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 4c7029c..5095d73 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_TARGETMACHINE_H
-#define PPC_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
@@ -24,46 +24,30 @@ namespace llvm {
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
class PPCTargetMachine : public LLVMTargetMachine {
- PPCSubtarget Subtarget;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ PPCSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit);
+ CodeGenOpt::Level OL);
- const PPCInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const PPCFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
- const PPCTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
+ ~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) override;
/// \brief Register PPC analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 3e71bbc..cd84da2 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 74b5f45..6493713 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETSTREAMER_H
-#define PPCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
@@ -19,6 +19,8 @@ public:
virtual ~PPCTargetStreamer();
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
+ virtual void emitAbiVersion(int AbiVersion) = 0;
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
};
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 007901b..37624ed 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -38,6 +38,7 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
+ const TargetMachine *TM;
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
@@ -47,16 +48,16 @@ public:
}
PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
pushTTIStack(this);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -64,7 +65,7 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -79,33 +80,31 @@ public:
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
- virtual PopcntSupportKind
- getPopcntSupport(unsigned TyWidth) const override;
- virtual void getUnrollingPreferences(
- Loop *L, UnrollingPreferences &UP) const override;
+ PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+ void getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
- virtual unsigned getNumberOfRegisters(bool Vector) const override;
- virtual unsigned getRegisterBitWidth(bool Vector) const override;
- virtual unsigned getMaximumUnrollFactor() const override;
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind,
- OperandValueKind) const override;
- virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
- virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
- virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const override;
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaxInterleaveFactor() const override;
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const override;
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const override;
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const override;
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const override;
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const override;
/// @}
};
@@ -271,8 +270,9 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTI::getIntImmCost(Imm, Ty);
}
-void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
- if (ST->getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
@@ -297,7 +297,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTI::getMaximumUnrollFactor() const {
+unsigned PPCTTI::getMaxInterleaveFactor() const {
unsigned Directive = ST->getDarwinDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -318,14 +318,15 @@ unsigned PPCTTI::getMaximumUnrollFactor() const {
return 2;
}
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned PPCTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
}
unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,