aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp103
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp46
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp32
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h2
-rw-r--r--lib/Target/PowerPC/PPC.td10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp24
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp38
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp168
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h198
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp11
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1002
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h34
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td62
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td14
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td3
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h43
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp58
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h29
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp51
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h35
29 files changed, 1100 insertions, 988 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 3ac037d..2f562ca 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -238,7 +238,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseExpression(const MCExpr *&EVal);
bool ParseDarwinExpression(const MCExpr *&EVal);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
@@ -246,12 +246,11 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDarwinDirectiveMachine(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
- void ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
/// @name Auto-generated Match Functions
/// {
@@ -276,13 +275,12 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
const MCExpr *applyModifierToExpr(const MCExpr *E,
@@ -548,8 +546,9 @@ public:
void print(raw_ostream &OS) const override;
- static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Token);
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -558,22 +557,27 @@ public:
return Op;
}
- static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S,
- bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
// Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
- PPCOperand *Op = new (Mem) PPCOperand(Token);
- Op->Tok.Data = (const char *)(Op + 1);
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = (const char *)(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((char *)(Op + 1), Str.data(), Str.size());
+ std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
return Op;
}
- static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Immediate);
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -581,9 +585,9 @@ public:
return Op;
}
- static PPCOperand *CreateExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Expression);
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -592,9 +596,9 @@ public:
return Op;
}
- static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(TLSRegister);
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -602,8 +606,8 @@ public:
return Op;
}
- static PPCOperand *CreateFromMCExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -634,10 +638,8 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
-
-void PPCAsmParser::
-ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
case PPC::LAx: {
@@ -917,11 +919,10 @@ ProcessInstruction(MCInst &Inst,
}
}
-bool PPCAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
@@ -942,7 +943,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -1216,12 +1217,10 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
-bool PPCAsmParser::
-ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
- PPCOperand *Op;
// Attempt to parse the next token as an immediate
switch (getLexer().getKind()) {
@@ -1233,8 +1232,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
return Error(S, "invalid register name");
@@ -1249,8 +1247,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
}
@@ -1272,8 +1269,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Push the parsed operand into the list of operands
- Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
// Check whether this is a TLS call expression
bool TLSCall = false;
@@ -1292,8 +1288,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
}
// Otherwise, check for D-form memory operands
@@ -1340,17 +1335,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
}
return false;
}
/// Parse an instruction mnemonic followed by its operands.
-bool PPCAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
// The first operand is the token for the instruction name.
// If the next character is a '+' or '-', we need to add it to the
// instruction name, to match what TableGen is doing.
@@ -1554,7 +1547,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
@@ -1568,8 +1561,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
default: return Match_InvalidOperand;
}
- PPCOperand *Op = static_cast<PPCOperand*>(AsmOp);
- if (Op->isImm() && Op->getImm() == ImmVal)
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a4983ad..435a93f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,17 +102,45 @@ public:
// Output the constant in big/little endian byte order.
unsigned Size = Desc.getSize();
- if (IsLittleEndian) {
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)Bits;
- Bits >>= 8;
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
- } else {
- int ShiftValue = (Size * 8) - 8;
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)(Bits >> ShiftValue);
- Bits <<= 8;
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 10d068d..3ac0aca 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
@@ -127,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return true;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 3421b91..bca4085 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -79,7 +79,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index bd58539..a9842b2 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", ""
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model,
FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
+ [DirectivePwr8, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index e89fb2d..fd044d9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,8 +365,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to ADDIS8. If the global address is external,
- // has common linkage, is a function address, or is a jump table
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
// address, then generate a TOC entry and reference that. Otherwise
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
@@ -375,7 +375,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
@@ -384,15 +384,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
IsCommon = GV->hasCommonLinkage();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
- TM.getCodeModel() == CodeModel::Large)
+ if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
+ MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -425,7 +426,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
MOSymbol = getSymbol(GValue);
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ if (GValue->getType()->getElementType()->isFunctionTy() ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -450,17 +452,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
+ if (IsNonLocalFunction || IsExternal ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index ed3cb4d..92a0ec1 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1030,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
@@ -1197,6 +1201,11 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ CCInfo.AllocateStack(LinkageSize, 8);
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
@@ -1218,6 +1227,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
@@ -1858,16 +1874,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias, use the aliasee for determining thread-locality.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
- }
// FIXME: We don't yet handle the complexity of TLS.
- bool IsTLS = GVar && GVar->isThreadLocal();
- if (IsTLS)
+ if (GV->isThreadLocal())
return 0;
// For small code model, generate a simple TOC load.
@@ -1877,8 +1886,8 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
.addGlobalAddress(GV)
.addReg(PPC::X2);
else {
- // If the address is an externally defined symbol, a symbol with
- // common or externally available linkage, a function address, or a
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%X2, GV))
@@ -1889,12 +1898,13 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // !GVar implies a function address. An external variable is one
- // without an initializer.
// If/when switches are implemented, jump tables should be handled
// on the "if" path here.
- if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
- GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ if (CModel == CodeModel::Large ||
+ (GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index e294156..65e9cf2 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ Subtarget(STI) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
/// RemoveVRSaveCode - We have found that this function does not need any code
/// to manipulate the VRSAVE register, even though it uses vector registers.
/// This can happen when the only registers used are known to be live in or out
@@ -236,9 +398,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
- // Maximum call frame needs to be at least big enough for linkage and 8 args.
- unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 94e9b67..7a226f7 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -14,23 +14,18 @@
#define POWERPC_FRAMEINFO_H
#include "PPC.h"
-#include "PPCSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class PPCSubtarget;
+class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
public:
- PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
- Subtarget(sti) {
- }
+ PPCFrameLowering(const PPCSubtarget &STI);
unsigned determineFrameLayout(MachineFunction &MF,
bool UpdateMF = true,
@@ -79,6 +74,12 @@ public:
return isPPC64 ? 16 : 4;
}
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ static unsigned getTOCSaveOffset(void) {
+ return 40;
+ }
+
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
@@ -114,190 +115,9 @@ public:
return 8;
}
- /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
- /// argument area.
- static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
- // For the Darwin ABI / 64-bit SVR4 ABI:
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- if (isDarwinABI || isPPC64)
- return 8 * (isPPC64 ? 8 : 4);
-
- // 32-bit SVR4 ABI:
- // There is no default stack allocated for the 8 first GPR arguments.
- return 0;
- }
-
- /// getMinCallFrameSize - Return the minimum size a call frame can be using
- /// the PowerPC ABI.
- static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
- // The call frame needs to be at least big enough for linkage and 8 args.
- return getLinkageSize(isPPC64, isDarwinABI) +
- getMinCallArgumentsSize(isPPC64, isDarwinABI);
- }
-
- // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
-
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
-
- // Note that the offsets here overlap, but this is fixed up in
- // processFunctionBeforeFrameFinalized.
-
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
-
- // CR save area offset. We map each of the nonvolatile CR fields
- // to the slot for CR2, which is the first of the nonvolatile CR
- // fields to be assigned, so that we only allocate one save slot.
- // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
- {PPC::CR2, -4},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
-
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
-
- return Offsets;
- }
- }
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
};
-
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 7ca706b..d9b242c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -162,7 +162,8 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
unsigned Directive =
DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// If we're using a special group-terminating nop, then we need only one.
- if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
return 1;
return 5 - CurSlots;
@@ -223,7 +224,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
- CurSlots == 6) {
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
@@ -258,8 +259,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
//
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
- : TM(TM) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
EndDispatchGroup();
}
@@ -278,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
isLoad = MCID.mayLoad();
isStore = MCID.mayStore();
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index cf4332c..23f76c1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -54,7 +54,7 @@ public:
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
- const TargetMachine &TM;
+ const ScheduleDAG &DAG;
unsigned NumIssued; // Number of insts issued, including advanced cycles.
@@ -75,7 +75,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
unsigned NumStores;
public:
- PPCHazardRecognizer970(const TargetMachine &TM);
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
virtual void EmitInstruction(SUnit *SU) override;
virtual void AdvanceCycle() override;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 251e8b6..4881b3f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1454,10 +1454,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
- // The first source operand is a TargetGlobalAddress or a
- // TargetJumpTable. If it is an externally defined symbol, a symbol
- // with common linkage, a function address, or a jump table address,
- // or if we are generating code for large code model, we generate:
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it is an externally defined symbol, a symbol with common linkage,
+ // a non-local function address, or a jump table address, or if we are
+ // generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1472,8 +1472,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ if ((GValue->getType()->getElementType()->isFunctionTy() &&
+ (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index cf4c9e6..bc057bf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -50,20 +51,18 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
- if (TM.getSubtargetImpl()->isDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
return new TargetLoweringObjectFileMachO();
- if (TM.getSubtargetImpl()->isSVR4ABI())
- return new PPC64LinuxTargetObjectFile();
-
- return new TargetLoweringObjectFileELF();
+ return new PPC64LinuxTargetObjectFile();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+ Subtarget(*TM.getSubtargetImpl()) {
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
@@ -72,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- bool isPPC64 = Subtarget->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
@@ -98,10 +97,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (isPPC64 || Subtarget->hasFPCVT()) {
+ if (isPPC64 || Subtarget.hasFPCVT()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
@@ -176,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- if (Subtarget->hasFCPSGN()) {
+ if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
@@ -194,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
- if (Subtarget->hasFPRND()) {
+ if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -216,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- if (Subtarget->hasPOPCNTD()) {
+ if (Subtarget.hasPOPCNTD()) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
@@ -228,7 +227,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- if (!Subtarget->useCRBits()) {
+ if (!Subtarget.useCRBits()) {
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
@@ -241,11 +240,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -297,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget->isSVR4ABI()) {
+ if (Subtarget.isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
@@ -317,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget->isSVR4ABI() && !isPPC64)
+ if (Subtarget.isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
@@ -350,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -360,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
@@ -368,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// With the instructions enabled under FPCVT, we can do everything.
- if (PPCSubTarget.hasFPCVT()) {
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
@@ -382,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
- if (Subtarget->use64BitRegs()) {
+ if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
@@ -398,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (Subtarget->hasAltivec()) {
+ if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -488,7 +487,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32,
- Subtarget->useCRBits() ? Legal : Expand);
+ Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -507,7 +506,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -535,7 +534,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
- if (Subtarget->hasVSX()) {
+ if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
@@ -613,7 +612,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
@@ -642,7 +641,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -651,7 +650,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -664,7 +663,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget->isDarwin()) {
+ if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -679,21 +678,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setHasMultipleConditionRegisters();
setMinFunctionAlignment(2);
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget->isJITCodeModel())
+ if (isPPC64 && Subtarget.isJITCodeModel())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
setSupportJumpTables(false);
setInsertFencesForAtomic(true);
- if (Subtarget->enableMachineScheduler())
+ if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
@@ -702,8 +701,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// The Freescale cores does better with aggressive inlining of memcpy and
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
@@ -747,14 +746,14 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
- if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
- getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
return Align;
}
@@ -774,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
case PPCISD::LOAD: return "PPCISD::LOAD";
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
@@ -826,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
@@ -855,15 +853,17 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
@@ -871,18 +871,27 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j, k;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ j = 0;
+ k = 1;
+ } else {
+ j = 2;
+ k = 3;
+ }
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+k))
return false;
}
return true;
@@ -909,27 +918,39 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ }
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ }
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
@@ -946,18 +967,38 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
- ShiftAmt -= i;
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
- return -1;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+
+ ShiftAmt += i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
+ return -1;
+ }
+
+ } else { // Big Endian
+
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
}
return ShiftAmt;
}
@@ -1010,10 +1051,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- return SVOp->getMaskElt(0) / EltSize;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@@ -1299,7 +1344,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1350,7 +1395,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1497,7 +1542,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1518,7 +1563,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1555,7 +1600,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1646,7 +1691,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1891,7 +1936,8 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0);
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2086,6 +2132,43 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
return ArgSize;
}
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ Align = 16;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ return Align;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const TargetMachine &Target,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2094,8 +2177,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
else
@@ -2161,7 +2244,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2184,7 +2268,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- if (PPCSubTarget.hasVSX())
+ if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
else
RC = &PPC::F8RCRegClass;
@@ -2240,23 +2324,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(false, false));
-
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-
- FI->setMinReservedArea(MinReservedArea);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
@@ -2352,32 +2427,6 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
-// Set the size that is at least reserved in caller of this function. Tail
-// call optimized functions' reserved stack space needs to be aligned so that
-// taking the difference between two stack areas will result in an aligned
-// stack.
-void
-PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea,
- bool isPPC64) const {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign
- = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
-}
-
SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Chain,
@@ -2388,6 +2437,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2398,9 +2448,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned ArgOffset = LinkageSize;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2422,14 +2471,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
@@ -2442,24 +2490,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
- ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
- if (isVarArg) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else
- nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
@@ -2481,14 +2520,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- CurArgOffset = ArgOffset;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize)
+ if (ObjSize < PtrByteSize && !isLittleEndian)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
@@ -2522,7 +2555,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
}
MemOps.push_back(Store);
- ++GPR_idx;
}
// Whether we copied from a register or not, advance the offset
// into the parameter save area by a full doubleword.
@@ -2567,8 +2599,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
@@ -2578,18 +2608,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
- // Every 8 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- }
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
&PPC::VSFRCRegClass :
&PPC::F8RCRegClass);
@@ -2608,39 +2633,25 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
++VR_idx;
} else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
needsLoad = true;
}
+ ArgOffset += 16;
break;
}
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type.
if (needsLoad) {
- int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
@@ -2649,11 +2660,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(ArgVal);
}
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2667,7 +2683,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2706,7 +2723,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2997,11 +3015,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(ArgVal);
}
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -3040,75 +3068,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
-/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
-static unsigned
-CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
- bool isPPC64,
- bool isVarArg,
- unsigned CC,
- const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- unsigned &nAltivecParamsAtEnd) {
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
- unsigned NumOps = Outs.size();
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
- ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
- if (!isVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
-
- // Tail call needs the stack to be aligned.
- if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().
- getFrameLowering()->getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- }
-
- return NumBytes;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -3280,7 +3239,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
- EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, false, 0);
@@ -3373,10 +3332,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &PPCSubTarget) {
+ const PPCSubtarget &Subtarget) {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3385,11 +3344,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -3398,8 +3358,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
@@ -3422,8 +3382,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3497,8 +3457,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// additional register being allocated and an unnecessary move instruction
// being generated.
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- Callee, InFlag);
+ AddTOC, InFlag);
Chain = LoadTOCPtr.getValue(0);
InFlag = LoadTOCPtr.getValue(1);
@@ -3613,10 +3575,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
- PPCSubTarget);
+ Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
@@ -3657,7 +3619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
- if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -3682,7 +3644,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
InFlag = Chain.getValue(1);
}
@@ -3718,8 +3685,8 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
@@ -3981,6 +3948,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3997,16 +3965,37 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with at least 48 bytes, which
// is reserved space for [SP][CR][LR][3 x unused].
- // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
- // of this call.
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
- Outs, OutVals, nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4038,8 +4027,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -4068,6 +4057,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
@@ -4099,15 +4097,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
- "ByVal alignment is not a multiple of the pointer size");
-
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4116,7 +4105,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
ArgOffset += PtrByteSize;
continue;
@@ -4124,9 +4113,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
}
if (GPR_idx == NumGPRs && Size < 8) {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4161,8 +4153,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// small aggregates, particularly for packed ones.
// FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4172,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4205,7 +4200,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
@@ -4223,7 +4218,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
+ !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4239,15 +4235,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), false, false,
false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
}
- } else if (GPR_idx != NumGPRs)
- // If we have any FPRs remaining, we may also have GPRs remaining.
- ++GPR_idx;
+ }
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32) {
+ if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4264,21 +4258,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
if (isVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
@@ -4309,10 +4295,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
break;
}
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
+ // Non-varargs Altivec params go into VRs or on the stack.
if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
Arg.getSimpleValueType() == MVT::v2i64) ?
VSRH[VR_idx] : VR[VR_idx];
@@ -4323,12 +4307,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
- ArgOffset += 16;
}
+ ArgOffset += 16;
break;
}
}
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
@@ -4337,19 +4324,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG)) {
+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- SDValue PtrOff = DAG.getIntPtrConstant(40);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
- // R12 must contain the address of an indirect callee. This does not
- // mean the MTCTR instruction must use R12; it's easier to model this
- // as an extra parameter, so do that.
- RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4397,15 +4380,55 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs, OutVals,
- nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4441,7 +4464,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR_32[] = { // 32-bit registers.
@@ -4818,8 +4841,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -4842,8 +4865,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -5063,12 +5086,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ,
@@ -5077,8 +5100,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
// Convert the FP value to an int value through memory.
- bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
@@ -5120,17 +5143,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
DAG.getConstantFP(1.0, Op.getValueType()),
DAG.getConstantFP(0.0, Op.getValueType()));
- assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
@@ -5146,7 +5169,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
- !PPCSubTarget.hasFPCVT() &&
+ !Subtarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -5184,7 +5207,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
@@ -5201,7 +5224,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Ld;
- if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -5220,7 +5243,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
} else {
- assert(PPCSubTarget.isPPC64() &&
+ assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
@@ -5242,7 +5265,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5557,6 +5580,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
+ // The remaining cases assume either big endian element order or
+ // a splat-size that equates to the element size of the vector
+ // to be built. An example that doesn't work for little endian is
+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
+ // and a vector element size of 16 bits. The code below will
+ // produce the vector in big endian element order, which for little
+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
+
+ // For now, just avoid these optimizations in that case.
+ // FIXME: Develop correct optimizations for LE with mismatched
+ // splat and element sizes.
+
+ if (Subtarget.isLittleEndian() &&
+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
+ return SDValue();
+
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -5725,6 +5764,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
@@ -5733,15 +5773,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true) ||
- PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
return Op;
}
}
@@ -5749,15 +5789,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false) ||
- PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -5791,7 +5831,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
// perfect shuffle vector to determine if it is cost effective to do this as
// discrete instructions, or whether we should use a vperm.
- if (isFourElementShuffle) {
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
@@ -5820,6 +5862,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
@@ -5828,13 +5875,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i32));
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
+ MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
ResultMask);
- return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
}
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
@@ -6027,6 +6083,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
@@ -6038,13 +6095,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, DAG, dl, MVT::v8i16);
OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
- // Merge the results together.
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
- Ops[i*2 ] = 2*i+1;
- Ops[i*2+1] = 2*i+1+16;
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
}
- return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
llvm_unreachable("Unknown mul to lower!");
}
@@ -6064,17 +6132,17 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG, PPCSubTarget);
+ return LowerVASTART(Op, DAG, Subtarget);
case ISD::VAARG:
- return LowerVAARG(Op, DAG, PPCSubTarget);
+ return LowerVAARG(Op, DAG, Subtarget);
case ISD::VACOPY:
- return LowerVACOPY(Op, DAG, PPCSubTarget);
+ return LowerVACOPY(Op, DAG, Subtarget);
- case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
- return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -6144,7 +6212,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
EVT VT = N->getValueType(0);
if (VT == MVT::i64) {
- SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
Results.push_back(NewNode);
Results.push_back(NewNode.getValue(1));
@@ -6255,7 +6323,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6450,7 +6518,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
unsigned BufReg = MI->getOperand(1).getReg();
- if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
@@ -6463,12 +6531,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned BaseReg;
if (MF->getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
- BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
else
- BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
.addReg(BufReg);
@@ -6492,10 +6560,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// mainMBB:
// mainDstReg = 0
MIB = BuildMI(mainMBB, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
- if (PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
.addImm(LabelOffset)
@@ -6607,7 +6675,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
- if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
@@ -6645,7 +6713,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8)) {
@@ -6765,13 +6833,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
- BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
- BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
@@ -6862,7 +6930,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// We must use 64-bit registers for addresses when targeting 64-bit,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI->getOperand(0).getReg();
@@ -7070,10 +7138,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -7086,7 +7154,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7133,10 +7201,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -7149,7 +7217,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7266,10 +7334,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (!Visited.count(ChainLD->getChain().getNode()))
Queue.push_back(ChainLD->getChain().getNode());
} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
- for (SDNode::op_iterator O = ChainNext->op_begin(),
- OE = ChainNext->op_end(); O != OE; ++O)
- if (!Visited.count(O->getNode()))
- Queue.push_back(O->getNode());
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
} else
LoadRoots.insert(ChainNext);
}
@@ -7312,7 +7379,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- assert(PPCSubTarget.useCRBits() &&
+ assert(Subtarget.useCRBits() &&
"Expecting to be tracking CR bits");
// If we're tracking CR bits, we need to be careful that we don't have:
// trunc(binary-ops(zext(x), zext(y)))
@@ -7610,9 +7677,9 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
return SDValue();
if (!((N->getOperand(0).getValueType() == MVT::i1 &&
- PPCSubTarget.useCRBits()) ||
+ Subtarget.useCRBits()) ||
(N->getOperand(0).getValueType() == MVT::i32 &&
- PPCSubTarget.isPPC64())))
+ Subtarget.isPPC64())))
return SDValue();
if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -7930,8 +7997,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
EVT VT = RV.getValueType();
@@ -8051,6 +8118,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// This implements the loading of unaligned vectors as described in
// the venerable Apple Velocity Engine overview. Specifically:
@@ -8058,25 +8126,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
//
// The general idea is to expand a sequence of one or more unaligned
- // loads into a alignment-based permutation-control instruction (lvsl),
- // a series of regular vector loads (which always truncate their
- // input address to an aligned address), and a series of permutations.
- // The results of these permutations are the requested loaded values.
- // The trick is that the last "extra" load is not taken from the address
- // you might suspect (sizeof(vector) bytes after the last requested
- // load), but rather sizeof(vector) - 1 bytes after the last
- // requested vector. The point of this is to avoid a page fault if the
- // base address happened to be aligned. This works because if the base
- // address is aligned, then adding less than a full vector length will
- // cause the last vector in the sequence to be (re)loaded. Otherwise,
- // the next vector will be fetched as you might suspect was necessary.
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
// We might be able to reuse the permutation generation from
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
- DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
// Refine the alignment of the original load (a "new" load created here
// which was identical to the first except for the alignment would be
@@ -8125,8 +8196,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (ExtraLoad.getValueType() != MVT::v4i32)
ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
- SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
- BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
@@ -8151,12 +8232,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
++UI;
SmallVector<SDValue, 8> Ops;
- for (SDNode::op_iterator O = User->op_begin(),
- OE = User->op_end(); O != OE; ++O) {
- if (*O == Use)
+ for (const SDUse &O : User->ops()) {
+ if (O == Use)
Ops.push_back(To);
else
- Ops.push_back(*O);
+ Ops.push_back(O);
}
DAG.UpdateNodeOperands(User, Ops);
@@ -8166,9 +8246,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN:
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl &&
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
@@ -8180,8 +8263,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl) {
- // We've found another LVSL, and this address if an aligned
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -8190,6 +8273,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+ }
break;
case ISD::BSWAP:
@@ -8537,11 +8621,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
@@ -8573,7 +8657,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// register.
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
- if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
@@ -8707,8 +8791,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
// the stack.
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -8762,8 +8846,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// this table could be generated automatically from RegInfo.
unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
EVT VT) const {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
(!isPPC64 && VT != MVT::i32))
@@ -8804,7 +8888,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- if (this->PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
return MVT::i64;
} else {
return MVT::i32;
@@ -8863,7 +8947,7 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return false;
if (VT.getSimpleVT().isVector()) {
- if (PPCSubTarget.hasVSX()) {
+ if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64)
return false;
} else {
@@ -8907,7 +8991,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 080ef5d..df05aa5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -18,7 +18,6 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
-#include "PPCSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -71,19 +70,14 @@ namespace llvm {
TOC_ENTRY,
- /// The following three target-specific nodes are used for calls through
+ /// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
- /// Restore the TOC from the TOC save area of the current stack frame.
- /// This is basically a hard coded load instruction which additionally
- /// takes/produces a flag.
- TOC_RESTORE,
-
/// Like a regular LOAD but additionally taking/producing a flag.
LOAD,
- /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
- /// a hard coded load instruction.
+ /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+ /// destination.
LOAD_TOC,
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
@@ -303,25 +297,27 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -334,7 +330,7 @@ namespace llvm {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -343,8 +339,9 @@ namespace llvm {
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
}
+ class PPCSubtarget;
class PPCTargetLowering : public TargetLowering {
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget &Subtarget;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -613,11 +610,6 @@ namespace llvm {
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
SDValue ArgVal, SDLoc dl) const;
- void
- setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea, bool isPPC64) const;
-
SDValue
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b71c09e..9318f70 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
- "ld 2, 8($reg)", IIC_LdStLD,
- [(PPCload_toc i64:$reg)]>, isPPC64;
-
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
- "ld 2, 40(1)", IIC_LdStLD,
- [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+ "ld 2, $src", IIC_LdStLD,
+ [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f3c2eab..dce46d8 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,111 +22,127 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false) != -1;
+ return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true) != -1;
+ return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 7fed2c6..1e4396c 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
let Inst{30-31} = xo;
}
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> RST;
- bits<14> DS;
- bits<5> RA;
-
- let Pattern = pattern;
-
- let Inst{6-10} = RST;
- let Inst{11-15} = RA;
- let Inst{16-29} = DS;
- let Inst{30-31} = xo;
-}
// 1.7.6 X-Form
class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fd72384..9bac91d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -60,23 +61,25 @@ cl::Hidden);
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
-PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl()) {}
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
+ const InstrItineraryData *II =
+ &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -84,17 +87,18 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
- assert(TM.getInstrInfo() && "No InstrInfo?");
+ assert(DAG->TII && "No InstrInfo?");
- return new PPCHazardRecognizer970(TM);
+ return new PPCHazardRecognizer970(*DAG);
}
return new ScoreboardHazardRecognizer(II, DAG);
@@ -129,7 +133,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// On some cores, there is an additional delay between writing to a condition
// register, and using it from a branch.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
switch (Directive) {
default: break;
case PPC::DIR_7400:
@@ -142,6 +146,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case PPC::DIR_PWR6:
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
Latency += 2;
break;
}
@@ -313,12 +318,13 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// This function is used for scheduling, and the nop wanted here is the type
// that terminates dispatch groups on the POWER cores.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
unsigned Opcode;
switch (Directive) {
default: Opcode = PPC::NOP; break;
case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
}
DebugLoc DL;
@@ -332,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
@@ -538,7 +544,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// One-way branch.
if (!FBB) {
@@ -579,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!TM.getSubtargetImpl()->hasISEL())
+ if (!Subtarget.hasISEL())
return false;
if (Cond.size() != 2)
@@ -623,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(TM.getSubtargetImpl()->hasISEL() &&
+ assert(Subtarget.hasISEL() &&
"Cannot insert select on target without ISEL support");
// Get the register classes.
@@ -826,7 +832,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
.addReg(SrcReg,
@@ -921,7 +927,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_VRSAVE),
@@ -1035,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
} else {
ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
@@ -1102,7 +1108,7 @@ bool PPCInstrInfo::PredicateInstruction(
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
@@ -1124,7 +1130,7 @@ bool PPCInstrInfo::PredicateInstruction(
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
@@ -1162,7 +1168,7 @@ bool PPCInstrInfo::PredicateInstruction(
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
@@ -1323,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// for equality checks (as those don't depend on the sign). On PPC64,
// we are restricted to equality for unsigned 64-bit comparisons and for
// signed 32-bit comparisons the applicability is more restricted.
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index d9db3e1..83f14c6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,7 @@ enum PPC970_Unit {
class PPCInstrInfo : public PPCGenInstrInfo {
- PPCTargetMachine &TM;
+ PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
bool StoreRegToStackSlot(MachineFunction &MF,
@@ -80,7 +80,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
public:
- explicit PPCInstrInfo(PPCTargetMachine &TM);
+ explicit PPCInstrInfo(PPCSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -89,7 +89,7 @@ public:
const PPCRegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e421f8e..c2e3382 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -141,9 +141,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPSideEffect,
- SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 7bbc71b..e5f113a 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -13,7 +13,7 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -25,6 +25,11 @@ using namespace llvm;
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
+ : Subtarget(STI), is64Bit(STI.isPPC64()) {
+ useGOT = 0;
+}
+
#define BUILD_ADDIS(RD,RS,IMM16) \
((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
#define BUILD_ORI(RD,RS,UIMM16) \
@@ -393,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ } else if (Subtarget.isDarwinABI()){
JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 0693e3e..b6b37ff 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -18,32 +18,29 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class PPCTargetMachine;
+class PPCSubtarget;
+class PPCJITInfo : public TargetJITInfo {
+protected:
+ PPCSubtarget &Subtarget;
+ bool is64Bit;
- class PPCJITInfo : public TargetJITInfo {
- protected:
- PPCTargetMachine &TM;
- bool is64Bit;
- public:
- PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
- useGOT = 0;
- is64Bit = tmIs64Bit;
- }
+public:
+ PPCJITInfo(PPCSubtarget &STI);
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
+ unsigned char *GOTBase) override;
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
- };
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
+};
}
#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e333b51..eca774e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -973,6 +973,14 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
Offset += MI.getOperand(OffsetOperandNo).getImm();
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index f742f72..dc16742 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -16,9 +16,7 @@ using namespace llvm;
#define DEBUG_TYPE "powerpc-selectiondag-info"
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
-}
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 341b69c..b2e7f3b 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class PPCTargetMachine;
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ explicit PPCSelectionDAGInfo(const DataLayout *DL);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ea9daee..2e1b74a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -32,15 +32,57 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
- CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel) {
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM,
+ bool is64Bit, CodeGenOpt::Level OptLevel)
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ TLInfo(TM), TSInfo(&DL) {}
+
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void PPCSubtarget::SetJITMode() {
@@ -156,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine endianness.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+
+ // FIXME: For now, we disable VSX in little-endian mode until endian
+ // issues in those instructions can be addressed.
+ if (IsLittleEndian)
+ HasVSX = false;
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -200,6 +247,7 @@ static bool needsAggressiveScheduling(unsigned Directive) {
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ee43fd5..2a16699 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,7 +14,13 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -50,6 +56,7 @@ namespace PPC {
DIR_PWR6,
DIR_PWR6X,
DIR_PWR7,
+ DIR_PWR8,
DIR_64
};
}
@@ -102,12 +109,19 @@ protected:
/// OptLevel - What default optimization level we're emitting code for.
CodeGenOpt::Level OptLevel;
+ PPCFrameLowering FrameLowering;
+ const DataLayout DL;
+ PPCInstrInfo InstrInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
+ const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
CodeGenOpt::Level OptLevel);
/// ParseSubtargetFeatures - Parses features string setting specified
@@ -127,10 +141,21 @@ public:
///
unsigned getDarwinDirective() const { return DarwinDirective; }
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ PPCJITInfo *getJITInfo() { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
/// \brief Reset the features for the PowerPC target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2323add..9563b90 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -37,53 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
- std::string Ret;
-
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
- Ret = "e";
- else
- Ret = "E";
-
- Ret += DataLayout::getManglingComponent(T);
-
- // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
- // pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
- Ret += "-p:32:32";
-
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
- Ret += "-n32:64";
- else
- Ret += "-n32";
-
- return Ret;
-}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64Bit, OL),
- DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
- FrameLowering(Subtarget), JITInfo(*this, is64Bit),
- TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 9e92494..4c7029c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,11 +14,7 @@
#ifndef PPC_TARGETMACHINE_H
#define PPC_TARGETMACHINE_H
-#include "PPCFrameLowering.h"
-#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCJITInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,13 +25,6 @@ namespace llvm {
///
class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- PPCInstrInfo InstrInfo;
- PPCFrameLowering FrameLowering;
- PPCJITInfo JITInfo;
- PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -43,25 +32,29 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
- const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const PPCFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ const PPCInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- PPCJITInfo *getJITInfo() override { return &JITInfo; }
+ const PPCFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const PPCTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
}
- const DataLayout *getDataLayout() const override { return &DL; }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
// Pass Pipeline Configuration