aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/LLVMBuild.txt2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTX.h5
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h3
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp350
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h39
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h10
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp61
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp124
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h5
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp134
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h12
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td72
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h3
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h8
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h20
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp41
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h13
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp88
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h74
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp13
-rw-r--r--lib/Target/NVPTX/NVPTXVector.td4
31 files changed, 480 insertions, 678 deletions
diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt
index bc8d82e..6ea244a 100644
--- a/lib/Target/NVPTX/LLVMBuild.txt
+++ b/lib/Target/NVPTX/LLVMBuild.txt
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = NVPTXCodeGen
parent = NVPTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = NVPTX
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 4fd5bdd..11d737e 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -50,5 +50,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
AscizDirective = " .b8";
// @TODO: Can we just disable this?
+ WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
}
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 13ba57e..382525d 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -59,9 +59,8 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
llvm_unreachable("Unknown condition code");
}
-ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
-FunctionPass *
-createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 69fc86e..c343980 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Pass.h"
@@ -32,8 +33,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
- AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
}
const char *getPassName() const override {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 35ba4f1..833db04 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -17,8 +17,8 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
-#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
+#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
@@ -27,6 +27,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugInfo.h"
@@ -45,6 +46,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <sstream>
using namespace llvm;
@@ -108,160 +110,6 @@ void VisitGlobalVariableForEmission(
}
}
-// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
-// cannot just link to the existing version.
-/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
-///
-using namespace nvptx;
-const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
- MCContext &Ctx = AP.OutContext;
-
- if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
-
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
-
- if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
-
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (!CE)
- llvm_unreachable("Unknown constant value to lower!");
-
- switch (CE->getOpcode()) {
- default:
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(
- CE, AP.TM.getSubtargetImpl()->getDataLayout()))
- if (C != CE)
- return LowerConstant(C, AP);
-
- // Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::AddrSpaceCast: {
- // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
- // handled by the generic() logic in the MCExpr printer
- PointerType *DstTy = cast<PointerType>(CE->getType());
- PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType());
- if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
- return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::GetElementPtr: {
- const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
- // Generate a symbolic expression for the byte address
- APInt OffsetAI(TD.getPointerSizeInBits(), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
-
- const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
- if (!OffsetAI)
- return Base;
-
- int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
- Ctx);
- }
-
- case Instruction::Trunc:
- // We emit the value and depend on the assembler to truncate the generated
- // expression properly. This is important for differences between
- // blockaddress labels. Since the two labels are in the same function, it
- // is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
- case Instruction::BitCast:
- return LowerConstant(CE->getOperand(0), AP);
-
- case Instruction::IntToPtr: {
- const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
- // Handle casts to pointers by changing them into casts to the appropriate
- // integer type. This promotes constant folding and simplifies this code.
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
- false /*ZExt*/);
- return LowerConstant(Op, AP);
- }
-
- case Instruction::PtrToInt: {
- const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
- // Support only foldable casts to/from pointers that can be eliminated by
- // changing the pointer to the appropriately sized integer type.
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
-
- const MCExpr *OpExpr = LowerConstant(Op, AP);
-
- // We can emit the pointer value into this slot if the slot is an
- // integer slot equal to the size of the pointer.
- if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
- return OpExpr;
-
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr =
- MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
- }
-
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
- const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
- switch (CE->getOpcode()) {
- default:
- llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add:
- return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub:
- return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul:
- return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv:
- return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem:
- return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl:
- return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And:
- return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or:
- return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
- case Instruction::Xor:
- return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
- }
- }
- }
-}
-
void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
return;
@@ -316,7 +164,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
emitLineNumberAsDotLoc(*MI);
MCInst Inst;
@@ -389,8 +237,6 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
- const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
-
// Special: Do not mangle symbol operand of CALL_PROTOTYPE
if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
const MachineOperand &MO = MI->getOperand(0);
@@ -403,7 +249,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (!ST.hasImageHandles()) {
+ if (!nvptxSubtarget->hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
@@ -500,12 +346,12 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const DataLayout *TD = TM.getDataLayout();
+ const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Type *Ty = F->getReturnType();
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
if (Ty->getTypeID() == Type::VoidTyID)
return;
@@ -528,17 +374,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
} else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy().getSizeInBits()
<< " func_retval0";
- } else {
- if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- unsigned totalsz = TD->getTypeAllocSize(Ty);
- unsigned retAlignment = 0;
- if (!llvm::getAlign(*F, 0, retAlignment))
- retAlignment = TD->getABITypeAlignment(Ty);
- O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
- << "]";
- } else
- assert(false && "Unknown return type");
- }
+ } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
+ unsigned totalsz = TD->getTypeAllocSize(Ty);
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
+ } else
+ llvm_unreachable("Unknown return type");
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
@@ -574,6 +418,42 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
printReturnValStr(F, O);
}
+// Return true if MBB is the header of a loop marked with
+// llvm.loop.unroll.disable.
+// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
+bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
+ const MachineBasicBlock &MBB) const {
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ // TODO: isLoopHeader() should take "const MachineBasicBlock *".
+ // We insert .pragma "nounroll" only to the loop header.
+ if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
+ return false;
+
+ // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
+ // we iterate through each back edge of the loop with header MBB, and check
+ // whether its metadata contains llvm.loop.unroll.disable.
+ for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
+ const MachineBasicBlock *PMBB = *I;
+ if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
+ // Edges from other loops to MBB are not back edges.
+ continue;
+ }
+ if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
+ if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) {
+ if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+ AsmPrinter::EmitBasicBlockStart(MBB);
+ if (isLoopHeaderOfNoUnroll(MBB))
+ OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
+}
+
void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -624,14 +504,13 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo();
if (TRI->isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
- OutStreamer.AddComment(
- Twine("implicit-def: ") +
- TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ nvptxSubtarget->getRegisterInfo()->getName(RegNo));
}
OutStreamer.AddBlankLine();
}
@@ -793,11 +672,6 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
return false;
}
- if (const MDNode *md = dyn_cast<MDNode>(U))
- if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
- (md->getName().str() == "llvm.dbg.sp")))
- return true;
-
for (const User *UU : U->users())
if (usedInOneFunc(UU, oneFunc) == false)
return false;
@@ -938,6 +812,14 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
}
bool NVPTXAsmPrinter::doInitialization(Module &M) {
+ // Construct a default subtarget off of the TargetMachine defaults. The
+ // rest of NVPTX isn't friendly to change subtargets per function and
+ // so the default TargetMachine will have all of the options.
+ StringRef TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget STI(TT, CPU, FS, NTM);
SmallString<128> Str1;
raw_svector_ostream OS1(Str1);
@@ -952,10 +834,10 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+ Mang = new Mangler(TM.getDataLayout());
// Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1);
+ emitHeader(M, OS1, STI);
OutStreamer.EmitRawText(OS1.str());
// Already commented out
@@ -971,7 +853,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
OutStreamer.AddBlankLine();
}
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ // If we're not NVCL we're CUDA, go ahead and emit filenames.
+ if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL)
recordAndEmitFilenames(M);
GlobalsEmitted = false;
@@ -1012,22 +895,24 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
OutStreamer.EmitRawText(OS2.str());
}
-void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
+ const NVPTXSubtarget &STI) {
O << "//\n";
O << "// Generated by LLVM NVPTX Back-End\n";
O << "//\n";
O << "\n";
- unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
+ unsigned PTXVersion = STI.getPTXVersion();
O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
O << ".target ";
- O << nvptxSubtarget.getTargetName();
+ O << STI.getTargetName();
- if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ if (NTM.getDrvInterface() == NVPTX::NVCL)
O << ", texmode_independent";
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
- if (!nvptxSubtarget.hasDouble())
+ else {
+ if (!STI.hasDouble())
O << ", map_f64_to_f32";
}
@@ -1037,7 +922,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
O << "\n";
O << ".address_size ";
- if (nvptxSubtarget.is64Bit())
+ if (NTM.is64Bit())
O << "64";
else
O << "32";
@@ -1047,7 +932,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
-
// If we did not emit any functions, then the global declarations have not
// yet been emitted.
if (!GlobalsEmitted) {
@@ -1109,7 +993,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
raw_ostream &O) {
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
if (V->hasExternalLinkage()) {
if (isa<GlobalVariable>(V)) {
const GlobalVariable *GVar = cast<GlobalVariable>(V);
@@ -1153,7 +1037,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1287,7 +1171,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
// Special case: ABI requires that we use .u8 for predicates
if (ETy->isIntegerTy(1))
@@ -1341,7 +1225,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
- if (nvptxSubtarget.is64Bit()) {
+ if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
O << " .u64 " << *getSymbol(GVar) << "[";
O << ElementSize / 8;
} else {
@@ -1439,7 +1323,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
case Type::DoubleTyID:
return "f64";
case Type::PointerTyID:
- if (nvptxSubtarget.is64Bit())
+ if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
if (useB4PTR)
return "b64";
else
@@ -1456,7 +1340,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1469,7 +1353,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
@@ -1508,17 +1392,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
if (ATy)
return getOpenCLAlignment(TD, ATy->getElementType());
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (VTy) {
- Type *ETy = VTy->getElementType();
- unsigned int numE = VTy->getNumElements();
- unsigned int alignE = TD->getPrefTypeAlignment(ETy);
- if (numE == 3)
- return 4 * alignE;
- else
- return numE * alignE;
- }
-
const StructType *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
@@ -1541,50 +1414,22 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
- if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
- (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
- else {
- std::string argName = I->getName();
- const char *p = argName.c_str();
- while (*p) {
- if (*p == '.')
- O << "_";
- else
- O << *p;
- p++;
- }
- }
+ O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- Function::const_arg_iterator I, E;
- int i = 0;
-
- if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
- (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
- O << *CurrentFnSym << "_param_" << paramIndex;
- return;
- }
-
- for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
- if (i == paramIndex) {
- printParamName(I, paramIndex, O);
- return;
- }
- }
- llvm_unreachable("paramIndex out of bound");
+ O << *CurrentFnSym << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
MVT thePointerTy = TLI->getPointerTy();
O << "(\n";
@@ -1603,21 +1448,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isImage(*I)) {
std::string sname = I->getName();
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
else { // Default image is read_only
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
} else {
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
@@ -1650,7 +1495,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Special handling for pointer arguments to kernel
O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
- if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
+ NVPTX::CUDA) {
Type *ETy = PTy->getElementType();
int addrSpace = PTy->getAddressSpace();
switch (addrSpace) {
@@ -1779,7 +1625,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
if (NumBytes) {
O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
<< getFunctionNumber() << "[" << NumBytes << "];\n";
- if (nvptxSubtarget.is64Bit()) {
+ if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
} else {
@@ -1900,7 +1746,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
}
return;
} else {
- O << *LowerConstant(CPV, *this);
+ O << *lowerConstant(CPV);
return;
}
}
@@ -1910,7 +1756,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
@@ -2034,7 +1880,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
int Bytes;
// Old constants
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 83fa5d3..7e6b5e8 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -39,13 +39,6 @@
// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
// (subclass of MCStreamer).
-// This is defined in AsmPrinter.cpp.
-// Used to process the constant expressions in initializers.
-namespace nvptx {
-const llvm::MCExpr *
-LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
-}
-
namespace llvm {
class LineReader {
@@ -145,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned int nSym = 0;
unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
unsigned int nBytes = 4;
- if (AP.nvptxSubtarget.is64Bit())
+ if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
nBytes = 8;
for (pos = 0; pos < size; pos += nBytes) {
if (pos)
@@ -167,7 +160,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
O << *Name;
}
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
- O << *nvptx::LowerConstant(Cexpr, AP);
+ O << *AP.lowerConstant(Cexpr);
} else
llvm_unreachable("symbol type unknown");
nSym++;
@@ -194,6 +187,7 @@ private:
const Function *F;
std::string CurrentFnName;
+ void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
void EmitFunctionEntryLabel() override;
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
@@ -218,7 +212,7 @@ private:
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
- void emitHeader(Module &M, raw_ostream &O);
+ void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
void emitFunctionExternParamList(const MachineFunction &MF);
@@ -254,8 +248,10 @@ private:
typedef DenseMap<unsigned, unsigned> VRegMap;
typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
VRegRCMap VRegMapping;
- // cache the subtarget here.
- const NVPTXSubtarget &nvptxSubtarget;
+
+ // Cache the subtarget here.
+ const NVPTXSubtarget *nvptxSubtarget;
+
// Build the map between type name and ID based on module's type
// symbol table.
std::map<const Type *, std::string> TypeNameMap;
@@ -288,6 +284,8 @@ private:
MCOperand &MCOp);
void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
+ bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
+
LineReader *reader;
LineReader *getReader(std::string);
@@ -305,12 +303,12 @@ private:
bool EmitGeneric;
public:
- NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)),
+ EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA) {
CurrentBankselLabelInBasicBlock = "";
reader = nullptr;
- EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);
}
~NVPTXAsmPrinter() {
@@ -318,6 +316,15 @@ public:
delete reader;
}
+ bool runOnMachineFunction(MachineFunction &F) override {
+ nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
+ return AsmPrinter::runOnMachineFunction(F);
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AsmPrinter::getAnalysisUsage(AU);
+ }
+
bool ignoreLoc(const MachineInstr &);
std::string getVirtualRegisterName(unsigned) const;
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 962b123..7d4be8e 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -19,8 +19,8 @@
#include "NVPTX.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 314df38..34d3a66 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -26,9 +26,8 @@
using namespace llvm;
-NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
- is64bit(STI.is64Bit()) {}
+NVPTXFrameLowering::NVPTXFrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {}
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
@@ -45,7 +44,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
- if (is64bit) {
+ if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI =
BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 0846b78..d1e0a5c 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -19,18 +19,16 @@
namespace llvm {
class NVPTXSubtarget;
class NVPTXFrameLowering : public TargetFrameLowering {
- bool is64bit;
-
public:
- explicit NVPTXFrameLowering(NVPTXSubtarget &STI);
+ explicit NVPTXFrameLowering();
bool hasFP(const MachineFunction &MF) const override;
void emitPrologue(MachineFunction &MF) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
};
} // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 58fa95b..86d134b 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -22,10 +22,11 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
-#include "llvm/PassManager.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -54,8 +55,7 @@ private:
IRBuilder<> &Builder);
Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
IRBuilder<> &Builder);
- void remapNamedMDNode(Module *M, NamedMDNode *N);
- MDNode *remapMDNode(Module *M, MDNode *N);
+ void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N);
typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
@@ -125,12 +125,17 @@ bool GenericToNVVM::runOnModule(Module &M) {
ConstantToValueMap.clear();
}
+ // Copy GVMap over to a standard value map.
+ ValueToValueMapTy VM;
+ for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I)
+ VM[I->first] = I->second;
+
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
for (Module::named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; I++) {
- remapNamedMDNode(&M, I);
+ remapNamedMDNode(VM, I);
}
// Walk through the global variable initializers, and replace any use of
@@ -362,7 +367,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
}
}
-void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) {
bool OperandChanged = false;
SmallVector<MDNode *, 16> NewOperands;
@@ -372,7 +377,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
MDNode *Operand = N->getOperand(i);
- MDNode *NewOperand = remapMDNode(M, Operand);
+ MDNode *NewOperand = MapMetadata(Operand, VM);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
@@ -390,47 +395,3 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
N->addOperand(*I);
}
}
-
-MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
-
- bool OperandChanged = false;
- SmallVector<Value *, 8> NewOperands;
- unsigned NumOperands = N->getNumOperands();
-
- // Check if any operand is or contains a global variable in GVMap, and thus
- // converted to another value.
- for (unsigned i = 0; i < NumOperands; ++i) {
- Value *Operand = N->getOperand(i);
- Value *NewOperand = Operand;
- if (Operand) {
- if (isa<GlobalVariable>(Operand)) {
- GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
- if (I != GVMap.end()) {
- NewOperand = I->second;
- if (++i < NumOperands) {
- NewOperands.push_back(NewOperand);
- // Address space of the global variable follows the global variable
- // in the global variable debug info (see createGlobalVariable in
- // lib/Analysis/DIBuilder.cpp).
- NewOperand =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- I->second->getType()->getAddressSpace());
- }
- }
- } else if (isa<MDNode>(Operand)) {
- NewOperand = remapMDNode(M, cast<MDNode>(Operand));
- }
- }
- OperandChanged |= Operand != NewOperand;
- NewOperands.push_back(NewOperand);
- }
-
- // If none of the operands has been modified, return N as it is.
- if (!OperandChanged) {
- return N;
- }
-
- // If any of the operands has been modified, create a new MDNode with the new
- // operands.
- return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
-}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index cd0422d..e01c780 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -50,11 +50,15 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel),
- Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
+ : SelectionDAGISel(tm, OptLevel), TM(tm) {
doMulWide = (OptLevel > 0);
}
+bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
int NVPTXDAGToDAGISel::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -89,16 +93,14 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
const Function *F = MF->getFunction();
// Otherwise, check for an nvptx-f32ftz attribute on the function
if (F->hasFnAttribute("nvptx-f32ftz"))
- return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
- "nvptx-f32ftz")
- .getValueAsString() == "true");
+ return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
else
return false;
}
}
bool NVPTXDAGToDAGISel::allowFMA() const {
- const NVPTXTargetLowering *TL = Subtarget.getTargetLowering();
+ const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
return TL->allowFMA(*MF, OptLevel);
}
@@ -525,8 +527,7 @@ SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
}
}
-static unsigned int getCodeAddrSpace(MemSDNode *N,
- const NVPTXSubtarget &Subtarget) {
+static unsigned int getCodeAddrSpace(MemSDNode *N) {
const Value *Src = N->getMemOperand()->getValue();
if (!Src)
@@ -579,20 +580,16 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (SrcAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
- : NVPTX::cvta_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
- : NVPTX::cvta_shared_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
- : NVPTX::cvta_const_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
- : NVPTX::cvta_local_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -604,20 +601,20 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (DstAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
- : NVPTX::cvta_to_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
+ : NVPTX::cvta_to_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
- : NVPTX::cvta_to_shared_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
+ : NVPTX::cvta_to_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
- : NVPTX::cvta_to_const_yes;
+ Opc =
+ TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
- : NVPTX::cvta_to_local_yes;
+ Opc =
+ TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -638,7 +635,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+ unsigned int codeAddrSpace = getCodeAddrSpace(LD);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -713,9 +710,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
- : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_asi;
@@ -742,10 +738,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
- : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_ari_64;
@@ -797,7 +792,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
getI32Imm(fromTypeWidth), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_areg_64;
@@ -874,7 +869,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -974,9 +969,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1028,10 +1022,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1133,7 +1126,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1425,10 +1418,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
SDValue Ops[] = { Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1710,7 +1702,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2013,7 +2005,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+ unsigned int codeAddrSpace = getCodeAddrSpace(ST);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -2083,9 +2075,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), Addr, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_asi;
@@ -2112,10 +2103,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), Base, Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_ari_64;
@@ -2167,7 +2157,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
getI32Imm(toTypeWidth), Base, Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_areg_64;
@@ -2241,7 +2231,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
EVT StoreVT = MemSD->getMemoryVT();
// Address Space Setting
- unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
report_fatal_error("Cannot store to pointer that points to constant "
@@ -2344,9 +2334,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
break;
}
StOps.push_back(Addr);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2395,10 +2384,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
}
StOps.push_back(Base);
StOps.push_back(Offset);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2496,7 +2484,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
StOps.push_back(Base);
StOps.push_back(Offset);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -4772,7 +4760,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
}
// How many bits are in our mask?
- uint64_t NumBits = CountTrailingOnes_64(MaskVal);
+ uint64_t NumBits = countTrailingOnes(MaskVal);
Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
@@ -4836,10 +4824,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
NumZeros = 0;
// The number of bits in the result bitfield will be the number of
// trailing ones (the AND) minus the number of bits we shift off
- NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
+ NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
} else if (isShiftedMask_64(MaskVal)) {
NumZeros = countTrailingZeros(MaskVal);
- unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
+ unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
// The number of bits in the result bitfield will be the number of
// trailing zeros plus the number of set bits in the mask minus the
// number of bits we shift off
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 69afcd7..ca432b5 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -26,6 +26,7 @@ using namespace llvm;
namespace {
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+ const NVPTXTargetMachine &TM;
// If true, generate mul.wide from sext and mul
bool doMulWide;
@@ -43,8 +44,8 @@ public:
const char *getPassName() const override {
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
-
- const NVPTXSubtarget &Subtarget;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const NVPTXSubtarget *Subtarget;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 0b0b536..1dc81f7 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -106,9 +106,9 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
- : TargetLowering(TM), nvTM(&TM),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+ const NVPTXSubtarget &STI)
+ : TargetLowering(TM), nvTM(&TM), STI(STI) {
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
@@ -167,14 +167,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
- if (nvptxSubtarget.hasROT64()) {
+ if (STI.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
} else {
setOperationAction(ISD::ROTL, MVT::i64, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
}
- if (nvptxSubtarget.hasROT32()) {
+ if (STI.hasROT32()) {
setOperationAction(ISD::ROTL, MVT::i32, Legal);
setOperationAction(ISD::ROTR, MVT::i32, Legal);
} else {
@@ -203,8 +203,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -214,12 +215,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
@@ -232,9 +232,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::ADDE, MVT::i64, Expand);
// Register custom handling for vector loads/stores
- for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT VT = (MVT::SimpleValueType) i;
+ for (MVT VT : MVT::vector_valuetypes()) {
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@@ -261,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // PTX does not directly support SELP of i1, so promote to i32 first
+ setOperationAction(ISD::SELECT, MVT::i1, Custom);
+
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AND);
@@ -270,7 +271,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
// Now deduce the information based on the above mentioned
// actions
- computeRegisterProperties();
+ computeRegisterProperties(STI.getRegisterInfo());
}
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -878,7 +879,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
unsigned retAlignment,
const ImmutableCallSite *CS) const {
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return "";
@@ -905,16 +906,14 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ } else if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
- if((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
- } else {
- assert(false && "Unknown return type");
- }
+ llvm_unreachable("Unknown return type");
}
O << ") ";
}
@@ -1045,7 +1044,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Type *retTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -1456,8 +1455,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
- F->getContext(), ObjectVT) == NumElts &&
+ assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
+ ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
bool needTruncate = sz < 8 ? true : false;
@@ -1475,11 +1474,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(EltVT);
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1505,11 +1501,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParamV2, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1551,11 +1544,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(Ofst, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
Opc, dl, DAG.getVTList(LoadRetVTs),
LoadRetOps, EltVT, MachinePointerInfo());
@@ -1609,11 +1599,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(Offsets[i], MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps,
@@ -1679,7 +1666,7 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShAmt = Op.getOperand(2);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
- if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+ if (VTBits == 32 && STI.getSmVersion() >= 35) {
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} >> Amt
@@ -1739,7 +1726,7 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+ if (VTBits == 32 && STI.getSmVersion() >= 35) {
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} << Amt
@@ -1807,11 +1794,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);
+ case ISD::SELECT:
+ return LowerSelect(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Op0 = Op->getOperand(0);
+ SDValue Op1 = Op->getOperand(1);
+ SDValue Op2 = Op->getOperand(2);
+ SDLoc DL(Op.getNode());
+
+ assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
+
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+ SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
+
+ return Trunc;
+}
+
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
@@ -2033,13 +2038,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
+ const TargetLowering *TLI = STI.getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
bool isKernel = llvm::isKernelFunction(*F);
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -2337,7 +2342,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Type *RetTy = F->getReturnType();
const DataLayout *TD = getDataLayout();
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -3757,7 +3762,8 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
}
std::pair<unsigned, const TargetRegisterClass *>
-NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -3778,7 +3784,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -4200,7 +4206,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD:
case ISD::FADD:
- return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
+ return PerformADDCombine(N, DCI, STI, OptLevel);
case ISD::MUL:
return PerformMULCombine(N, DCI, OptLevel);
case ISD::SHL:
@@ -4285,11 +4291,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
}
}
- SmallVector<SDValue, 8> OtherOps;
-
// Copy regular operands
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- OtherOps.push_back(N->getOperand(i));
+ SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
@@ -4402,8 +4405,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
OtherOps.push_back(Chain); // Chain
// Skip operand 1 (intrinsic ID)
// Others
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
- OtherOps.push_back(N->getOperand(i));
+ OtherOps.append(N->op_begin() + 2, N->op_end());
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
@@ -4434,9 +4436,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
"Custom handling of non-i8 ldu/ldg?");
// Just copy all operands as-is
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
+ SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
// Force output to i16
SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index d66d81a..1b4da2c 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -436,7 +436,8 @@ class NVPTXSubtarget;
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
- explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
+ explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+ const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -469,7 +470,8 @@ public:
ConstraintType
getConstraintType(const std::string &Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint,
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const override;
SDValue LowerFormalArguments(
@@ -507,8 +509,10 @@ public:
bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
+ bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
+
private:
- const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+ const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
EVT = MVT::i32) const;
@@ -527,6 +531,8 @@ private:
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
+
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index a98fb37..aa36b6b 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -16,11 +16,11 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b5b4fbe..dabc3be 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,11 +14,11 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
@@ -28,9 +28,7 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXInstrInfo::anchor() {}
-// FIXME: Add the subtarget support on this constructor.
-NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI)
- : NVPTXGenInstrInfo(), RegInfo(STI) {}
+NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {}
void NVPTXInstrInfo::copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 6de7536..9b5d491 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -27,7 +27,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
const NVPTXRegisterInfo RegInfo;
virtual void anchor();
public:
- explicit NVPTXInstrInfo(NVPTXSubtarget &STI);
+ explicit NVPTXInstrInfo();
const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 9900b8c..68f0d9f 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -117,24 +117,24 @@ def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
//===----------------------------------------------------------------------===//
-def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
-def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
-def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">;
def useAtomRedG32forGen32 :
- Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
-def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
-def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
-def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
-def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+ Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
def useAtomRedG64forGen64 :
- Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
-def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
-def hasVote : Predicate<"Subtarget.hasVote()">;
-def hasDouble : Predicate<"Subtarget.hasDouble()">;
-def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
-def hasLDG : Predicate<"Subtarget.hasLDG()">;
-def hasLDU : Predicate<"Subtarget.hasLDU()">;
-def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+ Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget->hasVote()">;
+def hasDouble : Predicate<"Subtarget->hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
+def hasLDG : Predicate<"Subtarget->hasLDG()">;
+def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -150,12 +150,12 @@ def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
-def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
-def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
+def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
+def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def true : Predicate<"1">;
-def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
+def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
//===----------------------------------------------------------------------===//
@@ -296,7 +296,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// General Type Conversion
//-----------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Generate a cvt to the given type from all possible types.
// Each instance takes a CvtMode immediate that defines the conversion mode to
// use. It can be CvtNONE to omit a conversion mode.
@@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
-// Special select for predicate operands
-def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
- (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
- (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-
//
// Funnnel shift in clamp mode
//
@@ -1659,12 +1654,12 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
}
-defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
-defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
-defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
-defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
-defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
-defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
+defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
+defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
+defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
+defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
+defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
+defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
@@ -1673,6 +1668,13 @@ defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
+defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
+defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
+defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
+defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
+defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
+defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
+
defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
@@ -2094,7 +2096,7 @@ multiclass LD<NVPTXRegClass regclass> {
"$fromWidth \t$dst, [$addr+$offset];"), []>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LD_i8 : LD<Int16Regs>;
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
@@ -2136,7 +2138,7 @@ multiclass ST<NVPTXRegClass regclass> {
" \t[$addr+$offset], $src;"), []>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm ST_i8 : ST<Int16Regs>;
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
@@ -2220,7 +2222,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
@@ -2303,7 +2305,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm STV_i8 : ST_VEC<Int16Regs>;
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 8759406..da301d5 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Pass.h"
@@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
- AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
}
bool runOnFunction(Function &F) override;
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index a1e1b9e..c1c67e3 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -48,9 +48,9 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
char NVPTXPrologEpilogPass::ID = 0;
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
- const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFI = *STI.getFrameLowering();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
bool Modified = false;
calculateFrameObjectOffsets(MF);
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 358ccce..5ca96e4 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -71,8 +71,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
}
}
-NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)
- : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
+NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
#define GET_REGINFO_TARGET_DESC
#include "NVPTXGenRegisterInfo.inc"
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index d2e6733..75b8f15 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -22,19 +22,13 @@
#include "NVPTXGenRegisterInfo.inc"
namespace llvm {
-
-// Forward Declarations.
-class TargetInstrInfo;
-class NVPTXSubtarget;
-
class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
private:
- bool Is64Bit;
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
ManagedStringPool ManagedStrPool;
public:
- NVPTXRegisterInfo(const NVPTXSubtarget &st);
+ NVPTXRegisterInfo();
//------------------------------------------------------
// Pure virtual functions from TargetRegisterInfo
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 324420d..e83f735 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,11 +16,12 @@
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
using namespace llvm;
@@ -142,8 +143,9 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
- const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
- if (ST.getDrvInterface() == NVPTX::CUDA) {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ if (TM.getDrvInterface() == NVPTX::CUDA) {
// For CUDA, we preserve the param loads coming from function arguments
return false;
}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 3d52532..069d6e1 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
using namespace llvm;
@@ -25,17 +26,6 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
-static std::string computeDataLayout(bool is64Bit) {
- std::string Ret = "e";
-
- if (!is64Bit)
- Ret += "-p:32:32";
-
- Ret += "-i64:64-v16:16-v32:32-n16:32:64";
-
- return Ret;
-}
-
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
@@ -54,18 +44,18 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
}
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM,
- bool is64Bit)
- : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
- SmVersion(20), DL(computeDataLayout(is64Bit)),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
- FrameLowering(*this) {
-
- Triple T(TT);
-
- if (T.getOS() == Triple::NVCL)
- drvInterface = NVPTX::NVCL;
- else
- drvInterface = NVPTX::CUDA;
+ const std::string &FS,
+ const NVPTXTargetMachine &TM)
+ : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
+ InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
+ TSInfo(TM.getDataLayout()), FrameLowering() {}
+
+bool NVPTXSubtarget::hasImageHandles() const {
+ // Enable handles for Kepler+, where CUDA supports indirect surfaces and
+ // textures
+ if (TM.getDrvInterface() == NVPTX::CUDA)
+ return (SmVersion >= 30);
+
+ // Disabled, otherwise
+ return false;
}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index fb2d404..e9833e5 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -32,8 +32,6 @@ namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
virtual void anchor();
std::string TargetName;
- NVPTX::DrvInterface drvInterface;
- bool Is64Bit;
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;
@@ -41,7 +39,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned int SmVersion;
- const DataLayout DL; // Calculates type size & alignment
+ const NVPTXTargetMachine &TM;
NVPTXInstrInfo InstrInfo;
NVPTXTargetLowering TLInfo;
TargetSelectionDAGInfo TSInfo;
@@ -55,13 +53,12 @@ public:
/// of the specified module.
///
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM, bool is64Bit);
+ const std::string &FS, const NVPTXTargetMachine &TM);
const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const NVPTXRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -95,20 +92,9 @@ public:
}
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
-
- bool hasImageHandles() const {
- // Enable handles for Kepler+, where CUDA supports indirect surfaces and
- // textures
- if (getDrvInterface() == NVPTX::CUDA)
- return (SmVersion >= 30);
-
- // Disabled, otherwise
- return false;
- }
- bool is64Bit() const { return Is64Bit; }
+ bool hasImageHandles() const;
unsigned int getSmVersion() const { return SmVersion; }
- NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
std::string getTargetName() const { return TargetName; }
unsigned getPTXVersion() const { return PTXVersion; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index d87693f..1a267a6 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
#include "NVPTXTargetObjectFile.h"
+#include "NVPTXTargetTransformInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -24,12 +25,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
@@ -69,14 +70,29 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
}
+static std::string computeDataLayout(bool is64Bit) {
+ std::string Ret = "e";
+
+ if (!is64Bit)
+ Ret += "-p:32:32";
+
+ Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+
+ return Ret;
+}
+
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit),
TLOF(make_unique<NVPTXTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this, is64bit) {
+ DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) {
+ if (Triple(TT).getOS() == Triple::NVCL)
+ drvInterface = NVPTX::NVCL;
+ else
+ drvInterface = NVPTX::CUDA;
initAsmInfo();
}
@@ -110,8 +126,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
+ void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
@@ -125,12 +140,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
-void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
- // allows the NVPTX pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createNVPTXTargetTransformInfoPass(this));
+TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis(
+ [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
}
void NVPTXPassConfig::addIRPasses() {
@@ -149,6 +161,7 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ addPass(createStraightLineStrengthReducePass());
addPass(createSeparateConstOffsetFromGEPPass());
// The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
// by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
@@ -183,10 +196,8 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
-bool NVPTXPassConfig::addPreRegAlloc() { return false; }
-bool NVPTXPassConfig::addPostRegAlloc() {
- addPass(createNVPTXPrologEpilogPass());
- return false;
+void NVPTXPassConfig::addPostRegAlloc() {
+ addPass(createNVPTXPrologEpilogPass(), false);
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a726bd1..a81abfe 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
-#include "NVPTXSubtarget.h"
#include "ManagedStringPool.h"
+#include "NVPTXSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -25,7 +25,10 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
+ bool is64bit;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -37,9 +40,10 @@ public:
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
~NVPTXTargetMachine() override;
-
+ const DataLayout *getDataLayout() const override { return &DL; }
const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-
+ bool is64Bit() const { return is64bit; }
+ NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
@@ -55,8 +59,7 @@ public:
return TLOF.get();
}
- /// \brief Register NVPTX analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b09d0d4..b8af04d 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,19 +6,12 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// \file
-// This file implements a TargetTransformInfo analysis pass specific to the
-// NVPTX target machine. It uses the target's detailed information to provide
-// more precise answers to certain TTI queries, while letting the target
-// independent and default TTI implementations handle the rest.
-//
-//===----------------------------------------------------------------------===//
-#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -26,69 +19,10 @@ using namespace llvm;
#define DEBUG_TYPE "NVPTXtti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeNVPTXTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
- const NVPTXTargetLowering *TLI;
-public:
- NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- NVPTXTTI(const NVPTXTargetMachine *TM)
- : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override { pushTTIStack(this); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
- }
-
- bool hasBranchDivergence() const override;
-
- unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const override;
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
- "NVPTX Target Transform Info", true, true, false)
-char NVPTXTTI::ID = 0;
-
-ImmutablePass *
-llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
- return new NVPTXTTI(TM);
-}
-
-bool NVPTXTTI::hasBranchDivergence() const { return true; }
-
-unsigned NVPTXTTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -96,8 +30,8 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -109,7 +43,7 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first;
// Delegate other cases to the basic TTI.
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
}
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
new file mode 100644
index 0000000..bf21e88
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -0,0 +1,74 @@
+//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// NVPTX target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
+ typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const NVPTXSubtarget *ST;
+ const NVPTXTargetLowering *TLI;
+
+ const NVPTXSubtarget *getST() const { return ST; };
+ const NVPTXTargetLowering *getTLI() const { return TLI; };
+
+public:
+ explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
+ : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ bool hasBranchDivergence() { return true; }
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 5caa8bd..cf1feac 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -15,16 +15,16 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <string>
#include <vector>
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Support/MutexGuard.h"
using namespace llvm;
@@ -52,7 +52,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(prop && "Annotation property not a string");
// value
- ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
+ ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
@@ -75,7 +75,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *elem = NMD->getOperand(i);
- Value *entity = elem->getOperand(0);
+ GlobalValue *entity =
+ mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0));
// entity may be null due to DCE
if (!entity)
continue;
@@ -322,7 +323,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
if ((v >> 16) == index) {
align = v & 0xFFFF;
diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td
index 775df19..85aa34e 100644
--- a/lib/Target/NVPTX/NVPTXVector.td
+++ b/lib/Target/NVPTX/NVPTXVector.td
@@ -661,7 +661,7 @@ class ShuffleAsmStr4<string type>
string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
}
-let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
+let hasSideEffects=0, VecInstType=isVecShuffle.Value in {
def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
(ins V4F32Regs:$src1, V4F32Regs:$src2,
i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
@@ -847,7 +847,7 @@ class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
!strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
[], sop>;
-let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
+let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1,
VecInstType=isVecOther.Value in {
def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;