aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2013-03-05 23:27:24 -0800
committerStephen Hines <srhines@google.com>2013-03-05 23:27:24 -0800
commit5adb136be579e8fff3734461580cb34d1d2983b8 (patch)
treebff1a422e9c9789df563aaf9a7e91e63e8ec0384 /lib/Target/PowerPC
parent227a4a4ade38716ba9eb3205f48b52910f3b955e (diff)
parentb3201c5cf1e183d840f7c99ff779d57f1549d8e5 (diff)
downloadexternal_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.zip
external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.gz
external_llvm-5adb136be579e8fff3734461580cb34d1d2983b8.tar.bz2
Merge commit 'b3201c5cf1e183d840f7c99ff779d57f1549d8e5' into merge_20130226
Conflicts: include/llvm/Support/ELF.h lib/Support/DeltaAlgorithm.cpp Change-Id: I24a4fbce62eb39d924efee3c687b55e1e17b30cd
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp19
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp10
-rw-r--r--lib/Target/PowerPC/PPC.h26
-rw-r--r--lib/Target/PowerPC/PPC.td44
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp48
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp11
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp14
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td62
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp84
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h7
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp256
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp158
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp7
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h9
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp83
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h8
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td144
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp7
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h3
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp236
27 files changed, 901 insertions, 394 deletions
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 192d18d..6036428 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
+ PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index d61e741..61868d4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -151,7 +151,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_TOC;
break;
case PPC::fixup_ppc_toc16:
- Type = ELF::R_PPC64_TOC16;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+ Type = ELF::R_PPC64_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
+ }
break;
case PPC::fixup_ppc_toc16_ds:
switch (Modifier) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 215aa40..a25d7fe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -17,8 +17,9 @@ using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
PCSymbol = ".";
@@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
void PPCLinuxMCAsmInfo::anchor() { }
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
// ".comm align is in bytes but .align is pow-2."
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index e6d38eb..f71979f 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,6 +25,7 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
+ class ImmutablePass;
class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
@@ -37,6 +38,9 @@ namespace llvm {
JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
namespace PPCII {
@@ -53,26 +57,32 @@ namespace llvm {
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 4,
+ MO_PIC_FLAG = 2,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 8,
+ MO_NLP_FLAG = 4,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 16,
+ MO_NLP_HIDDEN_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 0xe0,
+ MO_ACCESS_MASK = 0xf0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 1 << 5,
- MO_HA16 = 2 << 5,
+ MO_LO16 = 1 << 4,
+ MO_HA16 = 2 << 4,
+
+ MO_TPREL16_HA = 3 << 4,
+ MO_TPREL16_LO = 4 << 4,
- MO_TPREL16_HA = 3 << 5,
- MO_TPREL16_LO = 4 << 5
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL16_LO = 5 << 4,
+ MO_TLSLD16_LO = 6 << 4,
+ MO_TOC16_LO = 7 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index cb15dad..9929136 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E500mc", "">;
def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
@@ -58,6 +63,25 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
+// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz
+// FRE p5 through p7 fre (vs. fres, available since p3)
+// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
+// LDBRX p7 load with byte reversal
+// LFIWAX p6, p6x, p7 lfiwax
+// LFIWZX p7 lfiwzx
+// POPCNTB p5 through p7 popcntb and related instructions
+// POPCNTD p7 popcntd and related instructions
+// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
+// VSX p7 vector-scalar instruction set
//===----------------------------------------------------------------------===//
// Register File Description
@@ -109,10 +133,30 @@ def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureSTFIWX, FeatureISEL,
Feature64Bit
/*, Feature64BitRegs */]>;
+def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureSTFIWX, FeatureISEL,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureQPX]>;
+def : Processor<"pwr3", G5Itineraries,
+ [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr4", G5Itineraries,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr5", G5Itineraries,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr5x", G5Itineraries,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"pwr6x", G5Itineraries,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index adb673b..eae9b7b 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -464,12 +464,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LDrs);
const MachineOperand &MO = MI->getOperand(1);
- assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!");
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
MCSymbol *MOSymbol = 0;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else {
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
@@ -732,14 +735,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOC, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+ OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -768,6 +771,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -802,7 +824,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppcA2",
"ppce500mc",
"ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
"power6",
+ "power6x",
"power7",
"ppc64"
};
@@ -817,8 +844,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
// FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
+ assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+ }
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -1031,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -1041,7 +1071,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -1060,7 +1090,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 9911575..bd1c378 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -28,10 +28,16 @@ using namespace llvm;
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -45,6 +51,9 @@ namespace {
char PPCBSel::ID = 0;
}
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
/// Pass
///
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index a74932c..b98cc48 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -54,6 +54,10 @@ using namespace llvm;
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct PPCCTRLoops : public MachineFunctionPass {
@@ -64,7 +68,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- PPCCTRLoops() : MachineFunctionPass(ID) {}
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -174,6 +180,12 @@ namespace {
};
} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 3f87e88..caeb179 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
// Vector types are always returned in V2.
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
@@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[
//===----------------------------------------------------------------------===//
-// PowerPC Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-/*
-def CC_PPC : CallingConv<[
- // The first 8 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
- CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
-
- // Common sub-targets passes FP values in F1 - F13
- CCIfType<[f32, f64],
- CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
-
- // The first 12 Vector arguments are passed in altivec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
-
-/*
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>*/
-]>;
-
-*/
-
-//===----------------------------------------------------------------------===//
-// PowerPC System V Release 4 ABI
+// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
-def CC_PPC_SVR4_Common : CallingConv<[
+def CC_PPC32_SVR4_Common : CallingConv<[
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
- CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
// Make sure the i64 words from a long double are either both passed in
// registers or both passed on the stack.
- CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
// FP values are passed in F1 - F8.
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
@@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
-def CC_PPC_SVR4_VarArg : CallingConv<[
- CCDelegateTo<CC_PPC_SVR4_Common>
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
-// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
-// vector arguments in vector registers before putting them on the stack.
-def CC_PPC_SVR4 : CallingConv<[
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCDelegateTo<CC_PPC_SVR4_Common>
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// Helper "calling convention" to handle aggregate by value arguments.
@@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[
// Still, the address of the aggregate copy in the callers stack frame is passed
// in a GPR (or in the parameter list area if all GPRs are allocated) from the
// caller to the callee. The location for the address argument is assigned by
-// the CC_PPC_SVR4 calling convention.
+// the CC_PPC32_SVR4 calling convention.
//
-// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
-def CC_PPC_SVR4_ByVal : CallingConv<[
+def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
- CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
]>;
def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 5901f36..0a396e6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -119,12 +119,21 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(*I);
- if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = getPPCRegisterNumbering(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
}
// If no registers are used, turn this into a copy.
@@ -198,13 +207,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
- // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
- // still generate stackless code if all local vars are reg-allocated.
- // Try: (FrameSize <= 224
- // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -777,7 +787,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MF.getRegInfo().setPhysRegUnused(LR);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -802,6 +813,16 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
+
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
// a large stack, which will require scavenging a register to materialize a
// large offset.
@@ -1115,6 +1136,47 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
.addReg(MoveReg));
}
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
bool
PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 3517d8c..d09e47f 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -27,7 +27,8 @@ class PPCFrameLowering: public TargetFrameLowering {
public:
PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
Subtarget(sti) {
}
@@ -50,6 +51,10 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 762b346..17bea8a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -23,9 +23,9 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -34,6 +34,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
@@ -48,7 +52,9 @@ namespace {
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {}
+ PPCSubTarget(*TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
@@ -61,6 +67,8 @@ namespace {
return true;
}
+ virtual void PostprocessISelDAG();
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -1273,16 +1281,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::TOC_ENTRY: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
- // For medium code model, we generate two instructions as described
- // below. Otherwise we allow SelectCodeCommon to handle this, selecting
- // one of LDtoc, LDtocJTI, and LDtocCPT.
- if (TM.getCodeModel() != CodeModel::Medium)
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
// The first source operand is a TargetGlobalAddress or a
// TargetJumpTable. If it is an externally defined symbol, a symbol
// with common linkage, a function address, or a jump table address,
- // we generate:
+ // or if we are generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1291,7 +1300,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
- if (isa<JumpTableSDNode>(GA))
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
@@ -1316,11 +1325,231 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
}
return SelectCode(N);
}
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI8L:
+ case PPC::ADDIL:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
/// createPPCISelDag - This pass converts a legalized DAG into a
@@ -1330,3 +1559,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
return new PPCDAGToDAGISel(TM);
}
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+ false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9966b2c..cf1f459 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -36,20 +36,20 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
@@ -498,15 +500,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
- maxStoresPerMemset = 32;
- maxStoresPerMemsetOptSize = 16;
- maxStoresPerMemcpy = 32;
- maxStoresPerMemcpyOptSize = 8;
- maxStoresPerMemmove = 32;
- maxStoresPerMemmoveOptSize = 8;
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
}
}
@@ -592,6 +594,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
}
}
@@ -1746,18 +1749,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1780,11 +1783,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1907,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
- CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1968,7 +1971,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -2160,13 +2163,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -2501,6 +2507,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
@@ -3323,7 +3332,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
- // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
@@ -3339,17 +3348,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// Emit tail call.
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
@@ -3493,11 +3491,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
bool Result;
if (Outs[i].IsFixed) {
- Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
- CCInfo);
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
} else {
- Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo);
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
}
if (Result) {
@@ -3510,7 +3508,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
} else {
// All arguments are treated the same.
- CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
@@ -3521,7 +3519,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
@@ -4415,14 +4413,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -4447,12 +4439,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -5028,11 +5025,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5128,23 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 12b3df7..f5d418c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -237,6 +237,12 @@ namespace llvm {
/// sym@got@dtprel@l.
ADDI_DTPREL_L,
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -252,13 +258,14 @@ namespace llvm {
/// or i32.
LBRX,
- /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces
- /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha.
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym@toc@ha.
ADDIS_TOC_HA,
- /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a
- /// LD instruction with base register G8RReg and offset sym@toc@l.
- /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
LD_TOC_L,
/// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 1dd5415..0120130 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -701,7 +701,7 @@ def : Pat<(PPCload ixaddr:$src),
def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
-// Support for medium code model.
+// Support for medium and large code model.
def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
"#ADDIStocHA",
[(set G8RC:$rD,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 8c077b7..460e943 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -181,7 +181,7 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
-// Instructions to support medium code model
+// Instructions to support medium and large code model
def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
@@ -346,7 +346,7 @@ def crbitm: Operand<i8> {
// Address operands
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
@@ -355,7 +355,7 @@ def memrr : Operand<iPTR> {
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 851de17..cfcd749 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -115,7 +115,7 @@ asm(
"lwz r2, 208(r1)\n" // stub's frame
"lwz r4, 8(r2)\n" // stub's lr
"li r5, 0\n" // 0 == 32 bit
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
"mtctr r3\n"
// Restore all int arg registers
"lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
@@ -178,7 +178,7 @@ asm(
"lwz 5, 104(1)\n" // stub's frame
"lwz 4, 4(5)\n" // stub's lr
"li 5, 0\n" // 0 == 32 bit
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"mtctr 3\n"
// Restore all int arg registers
"lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
@@ -259,10 +259,10 @@ asm(
"ld 4, 16(5)\n" // stub's lr
"li 5, 1\n" // 1 == 64 bit
#ifdef __ELF__
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"nop\n"
#else
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
#endif
"mtctr 3\n"
// Restore all int arg registers
@@ -292,9 +292,10 @@ void PPC64CompilationCallback() {
#endif
extern "C" {
-static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
// Adjust the pointer to the address of the call instruction in the stub
// emitted by emitFunctionStub, rather than the instruction after it.
unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 73f7a2c..9b0df3e 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
+ case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+ break;
+ case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+ break;
+ case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+ break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 24caffa..045b375 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -71,6 +71,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR;
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -83,7 +86,8 @@ public:
VarArgsFrameIndex(0),
VarArgsStackOffset(0),
VarArgsNumGPR(0),
- VarArgsNumFPR(0) {}
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -125,6 +129,9 @@ public:
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 378c147..df245cc 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -111,11 +111,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
- // For 32-bit SVR4, also initialize the frame index associated with
- // the CR spill slot.
- if (!Subtarget.isPPC64())
- CRSpillFrameIdx = 0;
-
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
@@ -222,45 +217,6 @@ PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-void PPCRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- I->getOpcode() == PPC::ADJCALLSTACKUP) {
- // Add (actually subtract) back the amount the callee popped on return.
- if (int CalleeAmt = I->getOperand(1).getImm()) {
- bool is64Bit = Subtarget.isPPC64();
- CalleeAmt *= -1;
- unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
- unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
-
- if (isInt<16>(CalleeAmt)) {
- BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addImm(CalleeAmt);
- } else {
- MachineBasicBlock::iterator MBBI = I;
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
- .addImm(CalleeAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
- .addImm(CalleeAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addReg(TmpReg);
- }
- }
- }
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
/// register first and then a spilled callee-saved register if that fails.
static
@@ -489,19 +445,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we must
- // create exactly one stack slot and return its FrameIdx for all
- // nonvolatiles.
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64())
FrameIdx = 0;
- } else if (CRSpillFrameIdx) {
- FrameIdx = CRSpillFrameIdx;
- } else {
- MachineFrameInfo *MFI =
- (const_cast<MachineFunction &>(MF)).getFrameInfo();
- FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
- CRSpillFrameIdx = FrameIdx;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
}
return true;
}
@@ -510,7 +461,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -524,20 +476,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- // Find out which operand is the frame index.
- unsigned FIOperandNo = 0;
- while (!MI.getOperand(FIOperandNo).isFI()) {
- ++FIOperandNo;
- assert(FIOperandNo != MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
// Take into account whether it's an add or mem instruction
- unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNo-1;
+ OffsetOperandNo = FIOperandNum-1;
// Get the frame index.
- int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Get the frame pointer save index. Users of this index are primarily
// DYNALLOC instructions.
@@ -567,7 +512,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
(is64Bit ? PPC::X31 : PPC::R31) :
(is64Bit ? PPC::X1 : PPC::R1),
false);
@@ -649,7 +594,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index a8fd796..9840666 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -30,7 +30,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
std::map<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
- mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@@ -56,10 +55,6 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void lowerDynamicAlloc(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
@@ -69,7 +64,8 @@ public:
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 5ca3876..8ee9b1e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -63,142 +63,28 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
field bits<5> Num = num;
}
-
// General-purpose registers
-def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
-def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
-def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
-def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
-def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
-def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
-def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
-def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
-def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
-def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
-def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
-def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
-def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
-def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
-def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
-def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
-def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
-def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
-def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
-def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
-def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
-def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
-def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
-def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
-def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
-def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
-def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
-def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
-def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
-def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
-def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
-def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
// 64-bit General-purpose registers
-def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
-def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
-def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
-def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
-def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
-def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
-def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
-def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
-def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
-def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
-def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
-def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
-def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
-def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
-def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
-def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
-def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
-def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
-def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
-def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
-def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
-def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
-def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
-def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
-def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
-def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
-def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
-def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
-def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
-def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
-def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
-def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
// Floating-point registers
-def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
-def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
-def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
-def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
-def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
-def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
-def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
-def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
-def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
-def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
-def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
-def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
-def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
-def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
-def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
-def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
-def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
-def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
-def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
-def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
-def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
-def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
-def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
-def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
-def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
-def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
-def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
-def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
-def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
-def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
-def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
-def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
// Vector registers
-def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
-def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
-def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
-def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
-def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
-def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
-def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
-def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
-def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
-def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
-def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
-def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
-def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
-def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
-def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
-def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
-def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
-def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
-def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
-def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
-def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
-def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
-def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
-def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
-def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
-def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
-def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
-def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
-def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
-def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
-def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
-def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+foreach Index = 0-31 in {
+ def V#Index : VR<Index, "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index d9b4e30..18e4c07 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -36,6 +36,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
+ , HasQPX(false)
, HasFSQRT(false)
, HasSTFIWX(false)
, HasISEL(false)
@@ -82,6 +83,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
// Set up darwin-specific properties.
if (isDarwin())
HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
}
/// SetJITMode - This is called to inform the subtarget info that we are
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 416c0f3..15885bd 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -43,7 +43,12 @@ namespace PPC {
DIR_A2,
DIR_E500mc,
DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
DIR_PWR6,
+ DIR_PWR6X,
DIR_PWR7,
DIR_64
};
@@ -70,6 +75,7 @@ protected:
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
+ bool HasQPX;
bool HasFSQRT;
bool HasSTFIWX;
bool HasISEL;
@@ -150,6 +156,7 @@ public:
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool isBookE() const { return IsBookE; }
@@ -160,6 +167,8 @@ public:
bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGP - True if this is a BG/P platform.
bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b8b7882..fe851c1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -126,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
return false;
}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index d917d99..606ccb3 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -68,6 +68,9 @@ public:
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 0000000..5e9ad34
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,236 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // FIXME: PPC currently does not have custom popcnt lowering even though
+ // there is hardware support. Once this is fixed, update this function
+ // to reflect the real capabilities of the hardware.
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 12;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes)
+ Cost *= (SrcBytes/Alignment);
+
+ return Cost;
+}
+