aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp8
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp16
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp122
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--lib/Target/PowerPC/PPC.td19
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp23
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp48
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp297
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h26
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td28
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td94
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td87
-rw-r--r--lib/Target/PowerPC/PPCInstrHTM.td172
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td82
-rw-r--r--lib/Target/PowerPC/PPCInstrQPX.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td2
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp6
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp21
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp3
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp170
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h52
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td398
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp3
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp17
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h1
-rw-r--r--lib/Target/PowerPC/README.txt7
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt104
40 files changed, 1136 insertions, 805 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index bf00e73..99a1633 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -271,9 +271,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(_MII) {
+ PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -425,7 +425,9 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 0ed0723..a9f5fc7 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index c287fbe..311a4f2 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1 && "Invalid u1imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -221,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 8 && "Invalid u3imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 6ead19b..8718743 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,9 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = nullptr);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2b4f2d8..d8fab5b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+ // FIXME: This is not always needed. For example, it is not needed in the
+ // v2 abi.
+ NeedsLocalForSize = true;
+
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 06d380e..b9f0afb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -39,10 +40,10 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
- : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
- }
-
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
~PPCMCCodeEmitter() {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -158,14 +159,11 @@ public:
};
} // end anonymous namespace
-
+
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- Triple TT(STI.getTargetTriple());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+ return new PPCMCCodeEmitter(MCII, Ctx);
}
unsigned PPCMCCodeEmitter::
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index f0a6bb9..1c840d9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -36,9 +36,8 @@ private:
int64_t EvaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
- bool _IsDarwin)
- : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+ : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
public:
/// @name Construction
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f2da389..2f7a768 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -145,6 +145,7 @@ public:
}
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
+ Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -222,32 +223,19 @@ public:
};
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- if (Triple(TT).isOSDarwin()) {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetMachOStreamer(*S);
- return S;
- }
-
- MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new PPCTargetAsmStreamer(S, OS);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new PPCTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new PPCTargetELFStreamer(S);
+ return new PPCTargetMachOStreamer(S);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -261,60 +249,36 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
}
extern "C" void LLVMInitializePowerPCTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
- createPPCMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
- createPPCMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
- createPPCMCSubtargetInfo);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
- createPPCMCCodeEmitter);
-
+ for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
- createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+ }
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 68f7f7a..8b1e3b4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -34,10 +34,9 @@ class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
extern Target ThePPC64LETarget;
-
+
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f53add5..f175f6d 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -18,7 +18,7 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// PowerPC Subtarget features.
//
-
+
//===----------------------------------------------------------------------===//
// CPU Directives //
//===----------------------------------------------------------------------===//
@@ -112,14 +112,21 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+ "Enable POWER8 Crypto instructions",
+ [FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
-
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+ "HasPartwordAtomics", "true",
+ "Enable l[bh]arx and st[bh]cx.">;
def FeatureInvariantFunctionDescriptors :
SubtargetFeature<"invariant-function-descriptors",
"HasInvariantFunctionDescriptors", "true",
"Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+ "Enable Hardware Transactional Memory instructions">;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -256,11 +263,11 @@ def ProcessorFeatures {
[DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
- DeprecatedMFTB, DeprecatedDST];
+ FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
}
def : ProcessorModel<"970", G5Model,
@@ -339,7 +346,7 @@ def : ProcessorModel<"pwr7", P7Model,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
+ Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1327290..cd60906 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -69,12 +69,11 @@ namespace {
protected:
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget *Subtarget;
- uint64_t TOCLabelID;
StackMaps SM;
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), TOCLabelID(0), SM(*this) {}
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
const char *getPassName() const override {
return "PowerPC Assembly Printer";
@@ -321,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
-
- // To avoid name clash check if the name already exists.
- while (!TOCEntry) {
- if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == nullptr) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
-
+ if (!TOCEntry)
+ TOCEntry = createTempSymbol("C");
return TOCEntry;
}
@@ -1068,8 +1059,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.SwitchSection(Section);
OutStreamer.EmitLabel(CurrentFnSym);
OutStreamer.EmitValueToAlignment(8);
- MCSymbol *Symbol1 =
- OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
@@ -1082,11 +1072,6 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current.first, Current.second);
-
- MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
- ".L." + Twine(CurrentFnSym->getName()));
- OutStreamer.EmitLabel(RealFnSym);
- CurrentFnSymForSize = RealFnSym;
}
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 5af8aab..c595f44 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -171,8 +171,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
@@ -533,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, "loopcnt");
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 54532b5..fbd7b6d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -675,8 +675,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+
+ auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg);
+
+ // If we have an index register defined we use it in the store inst,
+ // otherwise we use X0 as base as it makes the vector instructions to
+ // use zero in the computation of the effective address regardless the
+ // content of the register.
+ if (IndexReg)
+ MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+ else
+ MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
@@ -1532,7 +1542,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b10e854..3ac8e94 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -186,20 +186,34 @@ namespace {
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
- // We need to make sure that this one operand does not end up in r0
- // (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
- const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
- SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
- SDValue NewOp =
- SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(Op), Op.getValueType(),
- Op, RC), 0);
-
- OutOps.push_back(NewOp);
- return false;
+
+ switch(ConstraintID) {
+ default:
+ errs() << "ConstraintID: " << ConstraintID << "\n";
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_es:
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Z:
+ case InlineAsm::Constraint_Zy:
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ SDLoc(Op), Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
+ return false;
+ }
+ return true;
}
void InsertVRSaveCode(MachineFunction &MF);
@@ -2105,7 +2119,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -2184,6 +2198,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPEQUD;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -2192,6 +2208,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTSD;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -2200,6 +2218,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTUD;
break;
default:
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 147e94b..871531e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
- // VSX v2i64 only supports non-arithmetic operations.
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
- setOperationAction(ISD::SRA, MVT::v2i64, Expand);
- setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
}
}
@@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
- // If the element value is larger than the splat value, cut it in half and
- // check to see if the two halves are equal. Continue doing this until we
- // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
- while (ValSizeInBytes > ByteSize) {
- ValSizeInBytes >>= 1;
-
- // If the top half equals the bottom half, we're still ok.
- if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
- (Value & ((1 << (8*ValSizeInBytes))-1)))
- return SDValue();
- }
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const MCPhysReg *GetFPR() {
- static const MCPhysReg FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
- return FPR;
-}
-
-/// GetQFPR - Get the set of QPX registers that should be allocated for
-/// arguments.
-static const MCPhysReg *GetQFPR() {
- static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
- };
-
- return QFPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
@@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
@@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const EVT VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot) {
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ }
+ else
+ return false;
+ break;
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
}
return true;
}
@@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- bool is64bit, unsigned BinOpcode) const {
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
@@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
@@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
- BB = EmitAtomicBinary(MI, BB, false, 0);
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
- BB = EmitAtomicBinary(MI, BB, true, 0);
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
- MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
- // l[wd]arx dest, ptr
+ // l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
- // st[wd]cx. newval, ptr
+ // st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
- // st[wd]cx. dest, ptr
+ // st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
@@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
MI->getOperand(0).getReg())
.addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+
if (Subtarget.isPPC64()) {
return MVT::i64;
- } else {
- return MVT::i32;
}
+
+ return MVT::i32;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 04afe88..8afd7ef 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -166,14 +166,6 @@ namespace llvm {
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
- /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
- /// reserve indexed. This is used to implement atomic operations.
- LARX,
-
- /// STCX = This corresponds to PPC stcx. instrcution: store conditional
- /// indexed. This is used to implement atomic operations.
- STCX,
-
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -489,7 +481,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *MBB, bool is64Bit,
+ MachineBasicBlock *MBB,
+ unsigned AtomicSize,
unsigned BinOpcode) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -526,6 +519,21 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "es")
+ return InlineAsm::Constraint_es;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "Z")
+ return InlineAsm::Constraint_Z;
+ else if (ConstraintCode == "Zy")
+ return InlineAsm::Constraint_Zy;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 69c0d7d..183d088 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -235,15 +235,19 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", IIC_LdStLDARX,
- [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+ "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
+
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
-let Defs = [CR0] in
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX,
- [(PPCstcx i64:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -325,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
Requires<[In64BitMode]>;
}
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -696,7 +706,7 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
@@ -752,7 +762,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
@@ -810,7 +820,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f6acd6e..123808b 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
!strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+ !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -342,7 +352,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -750,7 +760,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-
+
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
@@ -939,8 +949,50 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+ v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+ v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+ v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+ v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+} // isCommutable
+
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
// Count Leading Zeros
def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
"vclzb $vD, $vB", IIC_VecGeneral,
@@ -992,4 +1044,42 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vorc $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (or v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+ int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+ int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+ int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+ int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+ int_ppc_altivec_crypto_vpermxor, v16i8>;
+
} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+ int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+ int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+ v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+ int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+ int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+ int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 506a2d0..b7a7a1f 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -693,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit R;
+
+ bit RC = 1;
+
+ let Inst{6-9} = 0;
+ let Inst{10} = R;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit A;
+
+ bit RC = 1;
+
+ let Inst{6} = A;
+ let Inst{7-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{7-9} = 0;
+ let Inst{10} = L;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
// XX*-Form (VSX)
class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1470,6 +1524,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
let Inst{21-31} = xo;
}
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<1> ST;
+ bits<4> SIX;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16} = ST;
+ let Inst{17-20} = SIX;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
// E-4 VXR-Form
class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..20e6a62
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+ return getI32Imm (N->getZExtValue());
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1 in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+ (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+ (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+ (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ []>, isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+ (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+ (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+ (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed. For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1). We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+ (XORI
+ (EXTRACT_SUBREG (
+ TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+ 1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+ (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+ (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+ (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+ (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+ (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+ (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+ (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+ (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+ (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+ (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+ (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+ (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+ (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+ (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+ (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+ (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+ (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+ (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fe9474a..c9c2949 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -61,7 +61,7 @@ void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- Subtarget(STI), RI(STI) {}
+ Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
@@ -113,9 +113,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
bool IsRegCR;
- if (TRI->isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI->getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -697,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addReg(Cond[1].getReg(), 0, SubIdx);
}
+static unsigned getCRBitValue(unsigned CRBit) {
+ unsigned Ret = 4;
+ if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+ CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+ CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+ CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+ Ret = 3;
+ if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+ CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+ CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+ CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+ Ret = 2;
+ if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+ CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+ CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+ CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+ Ret = 1;
+ if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+ CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+ CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+ CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+ Ret = 0;
+
+ assert(Ret != 4 && "Invalid CR bit register");
+ return Ret;
+}
+
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -742,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SrcReg = SuperReg;
}
+ // Different class register copy
+ if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ unsigned CRReg = getCRFromCRBit(SrcReg);
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(CRReg), getKillRegState(KillSrc);
+ // Rotate the CR bit in the CR fields to be the least significant bit and
+ // then mask with 0x1 (MB = ME = 31).
+ BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+ .addImm(31)
+ .addImm(31);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::G8RCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 4add6f9..7fd076a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -63,7 +63,7 @@ enum PPC970_Unit {
};
} // end namespace PPCII
-
+class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1a045b1..5eff156 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
-def SDT_PPClarx : SDTypeProfile<1, 1, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-def SDT_PPCstcx : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Instructions to support atomic operations
-def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
- [SDNPHasChain, SDNPMayLoad]>;
-def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
- [SDNPHasChain, SDNPMayStore]>;
-
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -445,6 +432,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def crrc0 : RegisterOperand<CRRC0> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+}
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -455,6 +454,15 @@ def u2imm : Operand<i32> {
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
def PPCU4ImmAsmOperand : AsmOperandClass {
let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
let RenderMethod = "addImmOperands";
@@ -715,7 +723,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
-
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
@@ -1446,15 +1454,44 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", IIC_LdStLWARX,
- [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+ "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
-let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX,
- [(PPCstcx i32:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
@@ -1473,7 +1510,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
//
// Unindexed (r+i) Loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
@@ -1570,7 +1607,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -2683,6 +2720,7 @@ include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index c984d46..5c66b42 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -501,7 +501,7 @@ let Uses = [RM] in {
"qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def QVLFDX : XForm_1<31, 583,
(outs qfrc:$FRT), (ins memrr:$src),
"qvlfdx $FRT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index d6cb3a0..ec04da4 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -66,7 +66,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index efd2d92..005bcaf 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -104,7 +104,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DL = F.getParent()->getDataLayout();
+ DL = &F.getParent()->getDataLayout();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -192,7 +192,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
if (const SCEVConstant *ConstPtrDiff =
dyn_cast<SCEVConstant>(PtrDiff)) {
- int64_t PD = abs64(ConstPtrDiff->getValue()->getSExtValue());
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
if (PD < (int64_t) CacheLineSize) {
DupPref = true;
break;
@@ -211,7 +211,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
- SCEVExpander SCEVE(*SE, "prefaddr");
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
IRBuilder<> Builder(MemI);
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index df65227..092a4ef 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
@@ -84,7 +85,6 @@ namespace {
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
};
}
@@ -141,9 +141,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
-
bool MadeChange = false;
for (LoopInfo::iterator I = LI->begin(), E = LI->end();
@@ -158,9 +155,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
- if (!DL)
- return MadeChange;
-
// Only prep. the inner-most loop
if (!L->empty())
return MadeChange;
@@ -261,6 +255,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
BasePtr->getType()->getPointerAddressSpace());
@@ -280,7 +275,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
MemI->hasName() ? MemI->getName() + ".phi" : "",
Header->getFirstNonPHI());
- SCEVExpander SCEVE(*SE, "pistart");
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
LoopPredecessor->getTerminator());
@@ -295,8 +290,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
Instruction *InsPoint = Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc =
- GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(),
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -341,9 +336,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
PtrIP = PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
PtrIP = I->second;
-
- GetElementPtrInst *NewPtr =
- GetElementPtrInst::Create(PtrInc, Diff->getValue(),
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, Diff->getValue(),
I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 819738b..0965cb3 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -184,6 +184,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ assert(MO.getReg() > PPC::NoRegister &&
+ MO.getReg() < PPC::NUM_TARGET_REGS &&
+ "Invalid register for this target!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index c9a9684..0e568d3 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -56,11 +57,11 @@ static cl::opt<bool>
AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
cl::desc("Force the use of a base pointer in every function"));
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
- : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
- ST.isPPC64() ? 0 : 1,
- ST.isPPC64() ? 0 : 1),
- Subtarget(ST) {
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+ : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+ TM.isPPC64() ? 0 : 1,
+ TM.isPPC64() ? 0 : 1),
+ TM(TM) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
// Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
// when it checks for ZERO folding.
if (Kind == 1) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RC_NOX0RegClass;
return &PPC::GPRC_NOR0RegClass;
}
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
}
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
@@ -108,28 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_SaveList :
- CSR_Darwin64_SaveList) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_SaveList :
- CSR_Darwin32_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+ : CSR_Darwin64_SaveList)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+ : CSR_Darwin32_SaveList);
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList :
- CSR_SVR464_Altivec_SaveList) :
- (SaveR2 ? CSR_SVR464_R2_SaveList :
- CSR_SVR464_SaveList)) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_SaveList :
- CSR_SVR432_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec()
+ ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList)
+ : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+ : CSR_SVR432_SaveList);
}
-const uint32_t*
-PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_RegMask;
@@ -139,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_RegMask :
- CSR_Darwin64_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_RegMask :
- CSR_Darwin32_RegMask);
-
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_SVR464_Altivec_RegMask :
- CSR_SVR464_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_RegMask :
- CSR_SVR432_RegMask);
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+ : CSR_Darwin64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+ : CSR_Darwin32_RegMask);
+
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+ : CSR_SVR464_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+ : CSR_SVR432_RegMask);
}
const uint32_t*
@@ -160,15 +158,13 @@ PPCRegisterInfo::getNoPreservedMask() const {
}
void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
- unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
- for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
- unsigned Reg = PseudoRegs[i];
- Mask[Reg / 32] &= ~(1u << (Reg % 32));
- }
+ for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+ Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
@@ -207,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (Subtarget.isPPC64()) {
+ if (TM.isPPC64()) {
Reserved.set(PPC::R13);
Reserved.set(PPC::X1);
@@ -238,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R29);
else
Reserved.set(PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -260,6 +256,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const unsigned DefaultSafety = 1;
@@ -291,8 +288,10 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
- const TargetRegisterClass *RC) const {
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (Subtarget.hasVSX()) {
// With VSX, we can inflate various sub-register classes to the full VSX
// register set.
@@ -303,7 +302,7 @@ const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
return &PPC::VSRCRegClass;
}
- return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
//===----------------------------------------------------------------------===//
@@ -326,10 +325,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Determine whether 64-bit pointers are used.
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
// Get the maximum call stack size.
@@ -443,10 +443,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -487,10 +488,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -522,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-static unsigned getCRFromCRBit(unsigned SrcReg) {
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- assert(Reg != 0 && "Invalid CR bit register");
- return Reg;
-}
-
void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -560,10 +531,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -603,10 +575,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -650,6 +623,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -675,6 +649,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -697,14 +672,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
// is arbitrary and will be subsequently ignored. For 32-bit, we have
// previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
FrameIdx = 0;
else {
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -757,6 +732,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
@@ -847,7 +823,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- bool is64Bit = Subtarget.isPPC64();
+ bool is64Bit = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -885,23 +861,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
- if (!Subtarget.isPPC64())
+ if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
else
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return PPC::X30;
if (Subtarget.isSVR4ABI() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ TM.getRelocationModel() == Reloc::PIC_)
return PPC::R29;
return PPC::R30;
@@ -927,6 +905,7 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
@@ -964,7 +943,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
unsigned StackEst =
@@ -983,7 +962,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The frame pointer will point to the end of the stack, so estimate the
// offset as the difference between the object offset and the FP location.
- return !isFrameOffsetLegal(MI, Offset);
+ return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
}
/// Insert defining instruction(s) for BaseReg to
@@ -992,7 +971,7 @@ void PPCRegisterInfo::
materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const {
- unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -1000,6 +979,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
@@ -1025,6 +1005,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1033,6 +1014,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef90..d304e1d 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -22,15 +22,44 @@
#include "PPCGenRegisterInfo.inc"
namespace llvm {
-class PPCSubtarget;
-class TargetInstrInfo;
-class Type;
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
- const PPCSubtarget &Subtarget;
+ const PPCTargetMachine &TM;
public:
- PPCRegisterInfo(const PPCSubtarget &SubTarget);
+ PPCRegisterInfo(const PPCTargetMachine &TM);
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
@@ -40,13 +69,14 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -97,7 +127,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 9a7df96..6ca68ed 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 2f3a1f9..d0954a1 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -124,401 +124,3 @@ include "PPCScheduleP8.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction to itinerary class map - When add new opcodes to the supported
-// set, refer to the following table to determine which itinerary class the
-// opcode belongs.
-//
-// opcode itinerary class
-// ====== ===============
-// add IIC_IntSimple
-// addc IIC_IntGeneral
-// adde IIC_IntGeneral
-// addi IIC_IntSimple
-// addic IIC_IntGeneral
-// addic. IIC_IntGeneral
-// addis IIC_IntSimple
-// addme IIC_IntGeneral
-// addze IIC_IntGeneral
-// and IIC_IntSimple
-// andc IIC_IntSimple
-// andi. IIC_IntGeneral
-// andis. IIC_IntGeneral
-// b IIC_BrB
-// bc IIC_BrB
-// bcctr IIC_BrB
-// bclr IIC_BrB
-// cmp IIC_IntCompare
-// cmpi IIC_IntCompare
-// cmpl IIC_IntCompare
-// cmpli IIC_IntCompare
-// cntlzd IIC_IntRotateD
-// cntlzw IIC_IntGeneral
-// crand IIC_BrCR
-// crandc IIC_BrCR
-// creqv IIC_BrCR
-// crnand IIC_BrCR
-// crnor IIC_BrCR
-// cror IIC_BrCR
-// crorc IIC_BrCR
-// crxor IIC_BrCR
-// dcba IIC_LdStDCBA
-// dcbf IIC_LdStDCBF
-// dcbi IIC_LdStDCBI
-// dcbst IIC_LdStDCBF
-// dcbt IIC_LdStLoad
-// dcbtst IIC_LdStLoad
-// dcbz IIC_LdStDCBF
-// divd IIC_IntDivD
-// divdu IIC_IntDivD
-// divw IIC_IntDivW
-// divwu IIC_IntDivW
-// dss IIC_LdStDSS
-// dst IIC_LdStDSS
-// dstst IIC_LdStDSS
-// eciwx IIC_LdStLoad
-// ecowx IIC_LdStLoad
-// eieio IIC_LdStLoad
-// eqv IIC_IntSimple
-// extsb IIC_IntSimple
-// extsh IIC_IntSimple
-// extsw IIC_IntSimple
-// fabs IIC_FPGeneral
-// fadd IIC_FPAddSub
-// fadds IIC_FPGeneral
-// fcfid IIC_FPGeneral
-// fcmpo IIC_FPCompare
-// fcmpu IIC_FPCompare
-// fctid IIC_FPGeneral
-// fctidz IIC_FPGeneral
-// fctiw IIC_FPGeneral
-// fctiwz IIC_FPGeneral
-// fdiv IIC_FPDivD
-// fdivs IIC_FPDivS
-// fmadd IIC_FPFused
-// fmadds IIC_FPGeneral
-// fmr IIC_FPGeneral
-// fmsub IIC_FPFused
-// fmsubs IIC_FPGeneral
-// fmul IIC_FPFused
-// fmuls IIC_FPGeneral
-// fnabs IIC_FPGeneral
-// fneg IIC_FPGeneral
-// fnmadd IIC_FPFused
-// fnmadds IIC_FPGeneral
-// fnmsub IIC_FPFused
-// fnmsubs IIC_FPGeneral
-// fres IIC_FPRes
-// frsp IIC_FPGeneral
-// frsqrte IIC_FPGeneral
-// fsel IIC_FPGeneral
-// fsqrt IIC_FPSqrtD
-// fsqrts IIC_FPSqrtS
-// fsub IIC_FPAddSub
-// fsubs IIC_FPGeneral
-// icbi IIC_LdStICBI
-// isel IIC_IntISEL
-// isync IIC_SprISYNC
-// lbz IIC_LdStLoad
-// lbzu IIC_LdStLoadUpd
-// lbzux IIC_LdStLoadUpdX
-// lbzx IIC_LdStLoad
-// ld IIC_LdStLD
-// ldarx IIC_LdStLDARX
-// ldu IIC_LdStLDU
-// ldux IIC_LdStLDUX
-// ldx IIC_LdStLD
-// lfd IIC_LdStLFD
-// lfdu IIC_LdStLFDU
-// lfdux IIC_LdStLFDUX
-// lfdx IIC_LdStLFD
-// lfs IIC_LdStLFD
-// lfsu IIC_LdStLFDU
-// lfsux IIC_LdStLFDUX
-// lfsx IIC_LdStLFD
-// lha IIC_LdStLHA
-// lhau IIC_LdStLHAU
-// lhaux IIC_LdStLHAUX
-// lhax IIC_LdStLHA
-// lhbrx IIC_LdStLoad
-// lhz IIC_LdStLoad
-// lhzu IIC_LdStLoadUpd
-// lhzux IIC_LdStLoadUpdX
-// lhzx IIC_LdStLoad
-// lmw IIC_LdStLMW
-// lswi IIC_LdStLMW
-// lswx IIC_LdStLMW
-// lvebx IIC_LdStLVecX
-// lvehx IIC_LdStLVecX
-// lvewx IIC_LdStLVecX
-// lvsl IIC_LdStLVecX
-// lvsr IIC_LdStLVecX
-// lvx IIC_LdStLVecX
-// lvxl IIC_LdStLVecX
-// lwa IIC_LdStLWA
-// lwarx IIC_LdStLWARX
-// lwaux IIC_LdStLHAUX
-// lwax IIC_LdStLHA
-// lwbrx IIC_LdStLoad
-// lwz IIC_LdStLoad
-// lwzu IIC_LdStLoadUpd
-// lwzux IIC_LdStLoadUpdX
-// lwzx IIC_LdStLoad
-// mcrf IIC_BrMCR
-// mcrfs IIC_FPGeneral
-// mcrxr IIC_BrMCRX
-// mfcr IIC_SprMFCR
-// mffs IIC_IntMFFS
-// mfmsr IIC_SprMFMSR
-// mfspr IIC_SprMFSPR
-// mfsr IIC_SprMFSR
-// mfsrin IIC_SprMFSR
-// mftb IIC_SprMFTB
-// mfvscr IIC_IntMFVSCR
-// mtcrf IIC_BrMCRX
-// mtfsb0 IIC_IntMTFSB0
-// mtfsb1 IIC_IntMTFSB0
-// mtfsf IIC_IntMTFSB0
-// mtfsfi IIC_IntMTFSB0
-// mtmsr IIC_SprMTMSR
-// mtmsrd IIC_LdStLD
-// mtspr IIC_SprMTSPR
-// mtsr IIC_SprMTSR
-// mtsrd IIC_IntMTSRD
-// mtsrdin IIC_IntMTSRD
-// mtsrin IIC_SprMTSRIN
-// mtvscr IIC_IntMFVSCR
-// mulhd IIC_IntMulHD
-// mulhdu IIC_IntMulHD
-// mulhw IIC_IntMulHW
-// mulhwu IIC_IntMulHWU
-// mulld IIC_IntMulHD
-// mulli IIC_IntMulLI
-// mullw IIC_IntMulHW
-// nand IIC_IntSimple
-// neg IIC_IntSimple
-// nor IIC_IntSimple
-// or IIC_IntSimple
-// orc IIC_IntSimple
-// ori IIC_IntSimple
-// oris IIC_IntSimple
-// rfi IIC_SprRFI
-// rfid IIC_IntRFID
-// rldcl IIC_IntRotateD
-// rldcr IIC_IntRotateD
-// rldic IIC_IntRotateDI
-// rldicl IIC_IntRotateDI
-// rldicr IIC_IntRotateDI
-// rldimi IIC_IntRotateDI
-// rlwimi IIC_IntRotate
-// rlwinm IIC_IntGeneral
-// rlwnm IIC_IntGeneral
-// sc IIC_SprSC
-// slbia IIC_LdStSLBIA
-// slbie IIC_LdStSLBIE
-// sld IIC_IntRotateD
-// slw IIC_IntGeneral
-// srad IIC_IntRotateD
-// sradi IIC_IntRotateDI
-// sraw IIC_IntShift
-// srawi IIC_IntShift
-// srd IIC_IntRotateD
-// srw IIC_IntGeneral
-// stb IIC_LdStStore
-// stbu IIC_LdStStoreUpd
-// stbux IIC_LdStStoreUpd
-// stbx IIC_LdStStore
-// std IIC_LdStSTD
-// stdcx. IIC_LdStSTDCX
-// stdu IIC_LdStSTDU
-// stdux IIC_LdStSTDUX
-// stdx IIC_LdStSTD
-// stfd IIC_LdStSTFD
-// stfdu IIC_LdStSTFDU
-// stfdux IIC_LdStSTFDU
-// stfdx IIC_LdStSTFD
-// stfiwx IIC_LdStSTFD
-// stfs IIC_LdStSTFD
-// stfsu IIC_LdStSTFDU
-// stfsux IIC_LdStSTFDU
-// stfsx IIC_LdStSTFD
-// sth IIC_LdStStore
-// sthbrx IIC_LdStStore
-// sthu IIC_LdStStoreUpd
-// sthux IIC_LdStStoreUpd
-// sthx IIC_LdStStore
-// stmw IIC_LdStLMW
-// stswi IIC_LdStLMW
-// stswx IIC_LdStLMW
-// stvebx IIC_LdStSTVEBX
-// stvehx IIC_LdStSTVEBX
-// stvewx IIC_LdStSTVEBX
-// stvx IIC_LdStSTVEBX
-// stvxl IIC_LdStSTVEBX
-// stw IIC_LdStStore
-// stwbrx IIC_LdStStore
-// stwcx. IIC_LdStSTWCX
-// stwu IIC_LdStStoreUpd
-// stwux IIC_LdStStoreUpd
-// stwx IIC_LdStStore
-// subf IIC_IntGeneral
-// subfc IIC_IntGeneral
-// subfe IIC_IntGeneral
-// subfic IIC_IntGeneral
-// subfme IIC_IntGeneral
-// subfze IIC_IntGeneral
-// sync IIC_LdStSync
-// td IIC_IntTrapD
-// tdi IIC_IntTrapD
-// tlbia IIC_LdStSLBIA
-// tlbie IIC_LdStDCBF
-// tlbsync IIC_SprTLBSYNC
-// tw IIC_IntTrapW
-// twi IIC_IntTrapW
-// vaddcuw IIC_VecGeneral
-// vaddfp IIC_VecFP
-// vaddsbs IIC_VecGeneral
-// vaddshs IIC_VecGeneral
-// vaddsws IIC_VecGeneral
-// vaddubm IIC_VecGeneral
-// vaddubs IIC_VecGeneral
-// vadduhm IIC_VecGeneral
-// vadduhs IIC_VecGeneral
-// vadduwm IIC_VecGeneral
-// vadduws IIC_VecGeneral
-// vand IIC_VecGeneral
-// vandc IIC_VecGeneral
-// vavgsb IIC_VecGeneral
-// vavgsh IIC_VecGeneral
-// vavgsw IIC_VecGeneral
-// vavgub IIC_VecGeneral
-// vavguh IIC_VecGeneral
-// vavguw IIC_VecGeneral
-// vcfsx IIC_VecFP
-// vcfux IIC_VecFP
-// vcmpbfp IIC_VecFPCompare
-// vcmpeqfp IIC_VecFPCompare
-// vcmpequb IIC_VecGeneral
-// vcmpequh IIC_VecGeneral
-// vcmpequw IIC_VecGeneral
-// vcmpgefp IIC_VecFPCompare
-// vcmpgtfp IIC_VecFPCompare
-// vcmpgtsb IIC_VecGeneral
-// vcmpgtsh IIC_VecGeneral
-// vcmpgtsw IIC_VecGeneral
-// vcmpgtub IIC_VecGeneral
-// vcmpgtuh IIC_VecGeneral
-// vcmpgtuw IIC_VecGeneral
-// vctsxs IIC_VecFP
-// vctuxs IIC_VecFP
-// vexptefp IIC_VecFP
-// vlogefp IIC_VecFP
-// vmaddfp IIC_VecFP
-// vmaxfp IIC_VecFPCompare
-// vmaxsb IIC_VecGeneral
-// vmaxsh IIC_VecGeneral
-// vmaxsw IIC_VecGeneral
-// vmaxub IIC_VecGeneral
-// vmaxuh IIC_VecGeneral
-// vmaxuw IIC_VecGeneral
-// vmhaddshs IIC_VecComplex
-// vmhraddshs IIC_VecComplex
-// vminfp IIC_VecFPCompare
-// vminsb IIC_VecGeneral
-// vminsh IIC_VecGeneral
-// vminsw IIC_VecGeneral
-// vminub IIC_VecGeneral
-// vminuh IIC_VecGeneral
-// vminuw IIC_VecGeneral
-// vmladduhm IIC_VecComplex
-// vmrghb IIC_VecPerm
-// vmrghh IIC_VecPerm
-// vmrghw IIC_VecPerm
-// vmrglb IIC_VecPerm
-// vmrglh IIC_VecPerm
-// vmrglw IIC_VecPerm
-// vmsubfp IIC_VecFP
-// vmsummbm IIC_VecComplex
-// vmsumshm IIC_VecComplex
-// vmsumshs IIC_VecComplex
-// vmsumubm IIC_VecComplex
-// vmsumuhm IIC_VecComplex
-// vmsumuhs IIC_VecComplex
-// vmulesb IIC_VecComplex
-// vmulesh IIC_VecComplex
-// vmuleub IIC_VecComplex
-// vmuleuh IIC_VecComplex
-// vmulosb IIC_VecComplex
-// vmulosh IIC_VecComplex
-// vmuloub IIC_VecComplex
-// vmulouh IIC_VecComplex
-// vnor IIC_VecGeneral
-// vor IIC_VecGeneral
-// vperm IIC_VecPerm
-// vpkpx IIC_VecPerm
-// vpkshss IIC_VecPerm
-// vpkshus IIC_VecPerm
-// vpkswss IIC_VecPerm
-// vpkswus IIC_VecPerm
-// vpkuhum IIC_VecPerm
-// vpkuhus IIC_VecPerm
-// vpkuwum IIC_VecPerm
-// vpkuwus IIC_VecPerm
-// vrefp IIC_VecFPRound
-// vrfim IIC_VecFPRound
-// vrfin IIC_VecFPRound
-// vrfip IIC_VecFPRound
-// vrfiz IIC_VecFPRound
-// vrlb IIC_VecGeneral
-// vrlh IIC_VecGeneral
-// vrlw IIC_VecGeneral
-// vrsqrtefp IIC_VecFP
-// vsel IIC_VecGeneral
-// vsl IIC_VecVSL
-// vslb IIC_VecGeneral
-// vsldoi IIC_VecPerm
-// vslh IIC_VecGeneral
-// vslo IIC_VecPerm
-// vslw IIC_VecGeneral
-// vspltb IIC_VecPerm
-// vsplth IIC_VecPerm
-// vspltisb IIC_VecPerm
-// vspltish IIC_VecPerm
-// vspltisw IIC_VecPerm
-// vspltw IIC_VecPerm
-// vsr IIC_VecVSR
-// vsrab IIC_VecGeneral
-// vsrah IIC_VecGeneral
-// vsraw IIC_VecGeneral
-// vsrb IIC_VecGeneral
-// vsrh IIC_VecGeneral
-// vsro IIC_VecPerm
-// vsrw IIC_VecGeneral
-// vsubcuw IIC_VecGeneral
-// vsubfp IIC_VecFP
-// vsubsbs IIC_VecGeneral
-// vsubshs IIC_VecGeneral
-// vsubsws IIC_VecGeneral
-// vsububm IIC_VecGeneral
-// vsububs IIC_VecGeneral
-// vsubuhm IIC_VecGeneral
-// vsubuhs IIC_VecGeneral
-// vsubuwm IIC_VecGeneral
-// vsubuws IIC_VecGeneral
-// vsum2sws IIC_VecComplex
-// vsum4sbs IIC_VecComplex
-// vsum4shs IIC_VecComplex
-// vsum4ubs IIC_VecComplex
-// vsumsws IIC_VecComplex
-// vupkhpx IIC_VecPerm
-// vupkhsb IIC_VecPerm
-// vupkhsh IIC_VecPerm
-// vupklpx IIC_VecPerm
-// vupklsb IIC_VecPerm
-// vupklsh IIC_VecPerm
-// vxor IIC_VecGeneral
-// xor IIC_IntSimple
-// xori IIC_IntSimple
-// xoris IIC_IntSimple
-//
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c91428d..ed88803 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
HasVSX = false;
HasP8Vector = false;
HasP8Altivec = false;
+ HasP8Crypto = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -94,7 +95,9 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
+ HasPartwordAtomics = false;
IsQPXStackUnaligned = false;
+ HasHTM = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 247a96d..b4c1bb1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -90,6 +90,7 @@ protected:
bool HasVSX;
bool HasP8Vector;
bool HasP8Altivec;
+ bool HasP8Crypto;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -113,6 +114,8 @@ protected:
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
+ bool HasPartwordAtomics;
+ bool HasHTM;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@@ -218,6 +221,7 @@ public:
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
+ bool hasP8Crypto() const { return HasP8Crypto; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
@@ -234,6 +238,7 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
+ bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
@@ -242,6 +247,7 @@ public:
return 16;
}
+ bool hasHTM() const { return HasHTM; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b219e93..7267529 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -160,11 +160,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
- CM, OL),
+ : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+ computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- TargetABI(computeTargetABI(Triple(TT), Options)),
- DL(getDataLayoutString(Triple(TT))), Subtarget(TT, CPU, TargetFS, *this) {
+ TargetABI(computeTargetABI(Triple(TT), Options)) {
initAsmInfo();
}
@@ -208,7 +207,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<PPCSubtarget>(
+ TargetTriple, CPU,
+ // FIXME: It would be good to have the subtarget additions here
+ // not necessary. Anything that turns them on/off (overrides) ends
+ // up being put at the end of the feature string, but the defaults
+ // shouldn't require adding them. Fixing this means pulling Feature64Bit
+ // out of most of the target cpus in the .td file and making it set only
+ // as part of initialization via the TargetTriple.
+ computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
}
return I.get();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6508484..7a49058 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,10 +29,6 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- // Calculates type size & alignment
- const DataLayout DL;
- PPCSubtarget Subtarget;
-
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
@@ -42,8 +38,6 @@ public:
~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 073bbb0..b46acd4 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
BaseT::getUnrollingPreferences(L, UP);
}
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ return LoopHasReductions;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index cef7079..21acea1 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -78,6 +78,7 @@ public:
/// \name Vector TTI Implementations
/// @{
+ bool enableAggressiveInterleaving(bool LoopHasReductions);
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor();
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 4132b04..dfe988f 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -621,3 +621,10 @@ void foo() {
bar(x);
__asm__("" ::: "cr2");
}
+
+//===----------------------------------------------------------------------===//
+
+Instruction fusion was introduced in ISA 2.06 and more opportunities added in
+ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
+and force instruction pairs to be scheduled together.
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 1e4c6fb..43d87d3 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -209,3 +209,107 @@ vector float f(vector float a, vector float b) {
return b;
}
+//===----------------------------------------------------------------------===//
+
+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+ entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ %0 = load <16 x i8>* %a, align 16
+ %1 = load <16 x i8>* %b, align 16
+ %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+ ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# BB#0: # %entry
+ addis 3, 2, .LCPI0_0@toc@ha
+ addis 4, 2, .LCPI0_1@toc@ha
+ addi 3, 3, .LCPI0_0@toc@l
+ addi 4, 4, .LCPI0_1@toc@l
+ lxvw4x 0, 0, 3
+ addi 3, 1, -16
+ lxvw4x 35, 0, 4
+ stxvw4x 0, 0, 3
+ ori 2, 2, 0
+ lxvw4x 34, 0, 3
+ addi 3, 1, -32
+ stxvw4x 35, 0, 3
+ vpmsumb 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+
+//===----------------------------------------------------------------------===//
+
+The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = add <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+
+This will generate the following instruction sequence:
+ std 5, -8(1)
+ std 5, -16(1)
+ addi 3, 1, -16
+ ori 2, 2, 0
+ lxvd2x 35, 0, 3
+ vaddudm 2, 2, 3
+ blr
+
+This will almost certainly cause a load-hit-store hazard.
+Since val is a value parameter, it should not need to be saved onto
+the stack, unless it's being done set up the vector register. Instead,
+it would be better to splat teh value into a vector register, and then
+remove the (dead) stores to the stack.
+
+//===----------------------------------------------------------------------===//
+
+At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
+preference to stfd. When we have a reg-immediate addressing mode, this is a
+poor choice, since we have to load the address into an index register. This
+should be fixed for P7/P8.
+
+//===----------------------------------------------------------------------===//
+
+Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
+However, we could actually support both kinds on either endianness, if we check
+for the appropriate shufflevector pattern for each case ... this would cause
+some additional shufflevectors to be recognized and implemented via the
+"swapped" form.
+
+//===----------------------------------------------------------------------===//
+
+There is a utility program called PerfectShuffle that generates a table of the
+shortest instruction sequence for implementing a shufflevector operation on
+PowerPC. However, this was designed for big-endian code generation. We could
+modify this program to create a little endian version of the table. The table
+is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
+
+//===----------------------------------------------------------------------===//
+
+Opportunies to use instructions from PPCInstrVSX.td during code gen
+ - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
+ - Scalar comparisons (xscmpodp and xscmpudp)
+ - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
+
+Related to this: we currently do not generate the lxvw4x instruction for either
+v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
+a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.