aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp52
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h43
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/PPC.td77
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp19
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp7
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp31
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp150
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td36
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td64
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td25
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp9
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp39
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp9
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h2
-rw-r--r--lib/Target/PowerPC/README.txt19
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt2
28 files changed, 438 insertions, 205 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 99a1633..90ab7a5 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case PPC::RLWINMbm:
+ case PPC::RLWINMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWIMIbm:
+ case PPC::RLWIMIobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWNMbm:
+ case PPC::RLWNMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
}
}
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a9f5fc7..5cbf3d9 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
public:
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~PPCDisassembler() {}
+ ~PPCDisassembler() override {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 311a4f2..1576544 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8718743..eca37eb 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,8 @@ public:
}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2..420c5c8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -208,7 +208,7 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCMachObjectWriter(
OS,
@@ -224,8 +224,7 @@ namespace {
ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394..3e3489f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit,
bool IsLittleEndian,
uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b9f0afb..725b47b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,7 +44,7 @@ public:
: MCII(mcii), CTX(ctx),
IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
- ~PPCMCCodeEmitter() {}
+ ~PPCMCCodeEmitter() override {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2f7a768..423e427 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new PPCTargetMachOStreamer(S);
}
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
- return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+ const MCRegisterInfo &MRI) {
+ return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
}
extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 8b1e3b4..5f2117c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
#undef PPC
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
namespace llvm {
class MCAsmBackend;
@@ -29,6 +30,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_pwrite_stream;
class raw_ostream;
extern Target ThePPC32Target;
@@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- bool IsLittleEndian,
- uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint32_t CPUType,
uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index f7259b9..44e69b7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
- uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(
new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f175f6d..1a02bcc 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -86,6 +86,10 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
+def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+ "Enable the bpermd instruction">;
+def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+ "Enable extended divide instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
@@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+ SubtargetFeature<"direct-move", "HasDirectMove", "true",
+ "Enable Power8 direct move instructions",
+ [FeatureVSX]>;
def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
"HasPartwordAtomics", "true",
"Enable l[bh]arx and st[bh]cx.">;
@@ -133,6 +141,38 @@ def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
+/* Since new processors generally contain a superset of features of those that
+ came before them, the idea is to make implementations of new processors
+ less error prone and easier to read.
+ Namely:
+ list<SubtargetFeature> Power8FeatureList = ...
+ list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+ [ features that Power8 does not support ]
+ list<SubtargetFeature> FutureProcessorFeatureList =
+ !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+ Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+ well as providing a single point of definition if the feature set will be
+ used elsewhere.
+*/
+def ProcessorFeatures {
+ list<SubtargetFeature> Power7FeatureList =
+ [DirectivePwr7, FeatureAltivec, FeatureVSX,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureBPERMD, FeatureExtDiv,
+ DeprecatedMFTB, DeprecatedDST];
+ list<SubtargetFeature> Power8SpecificFeatures =
+ [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+ list<SubtargetFeature> Power8FeatureList =
+ !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
// Note: Future features to add when support is extended to more
// recent ISA levels:
//
@@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE]>;
-/* Since new processors generally contain a superset of features of those that
- came before them, the idea is to make implementations of new processors
- less error prone and easier to read.
- Namely:
- list<SubtargetFeature> Power8FeatureList = ...
- list<SubtargetFeature> FutureProcessorSpecificFeatureList =
- [ features that Power8 does not support ]
- list<SubtargetFeature> FutureProcessorFeatureList =
- !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
-
- Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
- well as providing a single point of definition if the feature set will be
- used elsewhere.
-
-*/
-def ProcessorFeatures {
- list<SubtargetFeature> Power8FeatureList =
- [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
- FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
- FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
- Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
- FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
-}
-
def : ProcessorModel<"970", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt,
@@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
- [DirectivePwr7, FeatureAltivec, FeatureVSX,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
- DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cd60906..383a1e2 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
- MachineModuleInfoELF &MMIELF =
- MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
- OutContext),
- isPPC64 ? 8 : 4/*size*/);
- }
-
- Stubs.clear();
- OutStreamer.AddBlankLine();
- }
-
return AsmPrinter::doFinalization(M);
}
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fbd7b6d..002616b 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
}
// Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
MVT DstVT;
Type *DstTy = I->getType();
@@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
}
// Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
MVT DstVT, SrcVT;
Type *DstTy = I->getType();
@@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
+ else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ // We can't handle boolean returns when CR bits are in use.
+ return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3ac8e94..4f8d01b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -105,13 +105,6 @@ namespace {
return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
}
- /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
- /// with any number of 0s on either side. The 1s are allowed to wrap from
- /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
- /// 0x0F0F0000 is not, since all 1s are not contiguous.
- static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
getSmallIPtrImm(Offset));
}
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
- if (!Val)
- return false;
-
- if (isShiftedMask_32(Val)) {
- // look for the first non-zero bit
- MB = countLeadingZeros(Val);
- // look for the first zero bit after the run of ones
- ME = countLeadingZeros((Val - 1) ^ Val);
- return true;
- } else {
- Val = ~Val; // invert mask
- if (isShiftedMask_32(Val)) {
- // effectively look for the first zero bit
- ME = countLeadingZeros(Val) - 1;
- // effectively look for the first one bit after the run of zeros
- MB = countLeadingZeros((Val - 1) ^ Val) + 1;
- return true;
- }
- }
- // no run present
- return false;
-}
-
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 871531e..4c0b6a6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
+ case PPCISD::MFVSR: return "PPCISD::MFVSR";
+ case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
+ case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
- BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
- APInt APVal, APUndef;
- unsigned BitSize;
- bool HasAnyUndefs;
-
- if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
- return CFP->getValueAPF().isNegZero();
-
- return false;
-}
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
return DAG.getMemcpy(Op.getOperand(0), Op,
Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(12, MVT::i32), 8, false, true,
+ DAG.getConstant(12, MVT::i32), 8, false, true, false,
MachinePointerInfo(), MachinePointerInfo());
}
@@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(),
+ false, false, false, MachinePointerInfo(),
MachinePointerInfo());
}
@@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
RLI.MPI = MPI;
}
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+ break;
+ }
+ return Tmp;
+}
+
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDLoc dl) const {
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+ return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
ReuseLoadInfo RLI;
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
@@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert((Op.getValueType() == MVT::f32 ||
+ Op.getValueType() == MVT::f64) &&
+ "Invalid floating point type as target of conversion");
+ assert(Subtarget.hasFPCVT() &&
+ "Int to FP conversions with direct moves require FPCVT");
+ SDValue FP;
+ SDValue Src = Op.getOperand(0);
+ bool SinglePrec = Op.getValueType() == MVT::f32;
+ bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+ unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+ (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+ if (WordInt) {
+ FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+ dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+ else {
+ FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+
+ return FP;
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
DAG.getConstantFP(1.0, Op.getValueType()),
DAG.getConstantFP(0.0, Op.getValueType()));
+ // If we have direct moves, we can do all the conversion, skip the store/load
+ // however, without FPCVT we can't do most conversions.
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+ return LowerINT_TO_FPDirectMove(Op, DAG, dl);
+
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
@@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
- // The remaining cases assume either big endian element order or
- // a splat-size that equates to the element size of the vector
- // to be built. An example that doesn't work for little endian is
- // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
- // and a vector element size of 16 bits. The code below will
- // produce the vector in big endian element order, which for little
- // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
- // For now, just avoid these optimizations in that case.
- // FIXME: Develop correct optimizations for LE with mismatched
- // splat and element sizes.
-
- if (Subtarget.isLittleEndian() &&
- SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
- return SDValue();
-
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(0).getValueType() == MVT::ppcf128)
return;
@@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- const Function *F = MF.getFunction();
- // When expanding a memset, require at least two QPX instructions to cover
- // the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
- (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
- !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
- return MVT::v4f64;
- }
-
- // We should use Altivec/VSX loads and stores when available. For unaligned
- // addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Size >= 16 &&
- (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
- ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
- return MVT::v4i32;
+ if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+ }
if (Subtarget.isPPC64()) {
return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 8afd7ef..7e2ebd4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -119,6 +119,15 @@ namespace llvm {
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -368,10 +377,6 @@ namespace llvm {
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
- /// isAllNegativeZeroVector - Returns true if all elements of build_vector
- /// are -0.0.
- bool isAllNegativeZeroVector(SDNode *N);
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -649,6 +654,10 @@ namespace llvm {
void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
+ SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 183d088..d1d67cb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
"popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+ isPPC64, Requires<[HasBPERMD]>;
let isCodeGenOnly = 1, isCommutable = 1 in
def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
"popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
-defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IIC_IntMulHD,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index b7a7a1f..43c2158 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 5eff156..8aecb65 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
}
}
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ PPC970_DGroup_Cracked;
+ }
+}
+
multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -2300,14 +2319,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
-defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
let PPC970_Unit = 7 in {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index ec04da4..a98e58f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -41,6 +41,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
@@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
when the elements are larger than i32.
*/
def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
let Predicates = [HasP8Vector] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let isCommutable = 1 in {
@@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170,
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
} // AddedComplexity = 500
} // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// VSX direct move instructions
+def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ Requires<[In64BitMode]>;
+def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ Requires<[In64BitMode]>;
+def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index 005bcaf..2947c66 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "ppc-loop-data-prefetch"
#include "PPC.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
bool MadeChange = false;
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- MadeChange |= runOnLoop(L);
- }
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
return MadeChange;
}
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 092a4ef..b6e7799 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "ppc-loop-preinc-prep"
#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
bool MadeChange = false;
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- MadeChange |= runOnLoop(L);
- }
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
return MadeChange;
}
@@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!L->empty())
return MadeChange;
+ DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
BasicBlock *Header = L->getHeader();
const PPCSubtarget *ST =
TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
- unsigned HeaderLoopPredCount = 0;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- ++HeaderLoopPredCount;
- }
+ unsigned HeaderLoopPredCount =
+ std::distance(pred_begin(Header), pred_end(Header));
// Collect buckets of comparable addresses used by loads and stores.
typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
@@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (L->isLoopInvariant(PtrValue))
continue;
- const SCEV *LSCEV = SE->getSCEV(PtrValue);
- if (!isa<SCEVAddRecExpr>(LSCEV))
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+ if (LARSCEV->getLoop() != L)
+ continue;
+ } else {
continue;
+ }
bool FoundBucket = false;
for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
@@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// returns a value (which might contribute to determining the loop's
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
- !LoopPredecessor->getTerminator()->getType()->isVoidTy())
+ !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
LoopPredecessor = InsertPreheaderForLoop(L, this);
+ if (LoopPredecessor)
+ MadeChange = true;
+ }
if (!LoopPredecessor)
return MadeChange;
+ DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
SmallSet<BasicBlock *, 16> BBChanged;
for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
// The base address of each bucket is transformed into a phi and the others
@@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!BasePtrSCEV->isAffine())
continue;
+ DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+ assert(BasePtrSCEV->getLoop() == L &&
+ "AddRec for the wrong loop?");
+
Instruction *MemI = Buckets[i].begin()->second;
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
@@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!isSafeToExpand(BasePtrStartSCEV, *SE))
continue;
+ DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
MemI->hasName() ? MemI->getName() + ".phi" : "",
Header->getFirstNonPHI());
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 0965cb3..6df89fe 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
// If the target flags on the operand changes the name of the symbol, do that
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ed88803..f313b0a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
@@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
HasFPCVT = false;
HasISEL = false;
HasPOPCNTD = false;
+ HasBPERMD = false;
+ HasExtDiv = false;
HasCMPB = false;
HasLDBRX = false;
IsBookE = false;
@@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() {
HasICBT = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
+ HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
}
@@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else
CPUName = "generic";
}
-#if (defined(__APPLE__) || defined(__linux__)) && \
- (defined(__ppc__) || defined(__powerpc__))
- if (CPUName == "generic")
- CPUName = sys::getHostCPUName();
-#endif
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b4c1bb1..8d95508 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,6 +101,8 @@ protected:
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
+ bool HasBPERMD;
+ bool HasExtDiv;
bool HasCMPB;
bool HasLDBRX;
bool IsBookE;
@@ -115,6 +117,7 @@ protected:
bool HasICBT;
bool HasInvariantFunctionDescriptors;
bool HasPartwordAtomics;
+ bool HasDirectMove;
bool HasHTM;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
@@ -225,6 +228,8 @@ public:
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasBPERMD() const { return HasBPERMD; }
+ bool hasExtDiv() const { return HasExtDiv; }
bool hasCMPB() const { return HasCMPB; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
@@ -239,6 +244,7 @@ public:
return HasInvariantFunctionDescriptors;
}
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+ bool hasDirectMove() const { return HasDirectMove; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713..8aaf5e1 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
- virtual ~PPCTargetStreamer();
+ ~PPCTargetStreamer() override;
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index dfe988f..01233ae 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -622,6 +622,25 @@ void foo() {
__asm__("" ::: "cr2");
}
+//===-------------------------------------------------------------------------===
+Naming convention for instruction formats is very haphazard.
+We have agreed on a naming scheme as follows:
+
+<INST_form>{_<OP_type><OP_len>}+
+
+Where:
+INST_form is the instruction format (X-form, etc.)
+OP_type is the operand type - one of OPC (opcode), RD (register destination),
+ RS (register source),
+ RDp (destination register pair),
+ RSp (source register pair), IM (immediate),
+ XO (extended opcode)
+OP_len is the length of the operand in bits
+
+VSX register operands would be of length 6 (split across two fields),
+condition register fields of length 3.
+We would not need denote reserved fields in names of instruction formats.
+
//===----------------------------------------------------------------------===//
Instruction fusion was introduced in ISA 2.06 and more opportunities added in
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 43d87d3..1d5b092 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -277,7 +277,7 @@ This will generate the following instruction sequence:
This will almost certainly cause a load-hit-store hazard.
Since val is a value parameter, it should not need to be saved onto
the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat teh value into a vector register, and then
+it would be better to splat the value into a vector register, and then
remove the (dead) stores to the stack.
//===----------------------------------------------------------------------===//