aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp10
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h2
-rw-r--r--lib/Target/NVPTX/NVPTX.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp211
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h31
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp436
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp59
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp33
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h6
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp46
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td40
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp225
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp48
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp24
19 files changed, 955 insertions, 236 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7da2fed..a8293da 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -23,6 +23,8 @@ set(NVPTXCodeGen_sources
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
NVVMReflect.cpp
+ NVPTXGenericToNVVM.cpp
+ NVPTXPrologEpilogPass.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index b3e8b5d..edf4a80 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -22,7 +22,6 @@ namespace llvm {
enum AddressSpace {
ADDRESS_SPACE_GENERIC = 0,
ADDRESS_SPACE_GLOBAL = 1,
- ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
ADDRESS_SPACE_SHARED = 3,
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 459cd96..dfa1ff5 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -17,17 +17,15 @@
using namespace llvm;
-bool CompileForDebugging;
-
// -debug-compile - Command line option to inform opt and llc passes to
// compile for debugging
-static cl::opt<bool, true>
-Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
- cl::location(CompileForDebugging), cl::init(false));
+static cl::opt<bool> CompileForDebugging("debug-compile",
+ cl::desc("Compile for debugging"),
+ cl::Hidden, cl::init(false));
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 82097da..7d1633f 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -23,7 +23,7 @@ class StringRef;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit NVPTXMCAsmInfo(const StringRef &TT);
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 6a53a44..179dc27 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
@@ -26,6 +27,7 @@
namespace llvm {
class NVPTXTargetMachine;
class FunctionPass;
+class MachineFunctionPass;
class formatted_raw_ostream;
namespace NVPTXCC {
@@ -62,6 +64,10 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+ModulePass *createGenericToNVVMPass();
+ModulePass *createNVVMReflectPass();
+ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
+MachineFunctionPass *createNVPTXPrologEpilogPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ce5d78a..ff73931 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
-void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+void DiscoverDependentGlobals(const Value *V,
+ DenseSet<const GlobalVariable *> &Globals) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
- if (User *U = dyn_cast<User>(V)) {
+ if (const User *U = dyn_cast<User>(V)) {
for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
DiscoverDependentGlobals(U->getOperand(i), Globals);
}
@@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
/// instances to be emitted, but only after any dependents have been added
/// first.
void VisitGlobalVariableForEmission(
- GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
- DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+ const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
+ DenseSet<const GlobalVariable *> &Visited,
+ DenseSet<const GlobalVariable *> &Visiting) {
// Have we already visited this one?
if (Visited.count(GV))
return;
@@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
Visiting.insert(GV);
// Make sure we visit all dependents first
- DenseSet<GlobalVariable *> Others;
+ DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
- E = Others.end();
+ for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
@@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
+ if (!GlobalsEmitted) {
+ emitGlobals(*MF->getFunction()->getParent());
+ GlobalsEmitted = true;
+ }
+
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
@@ -429,9 +436,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
}
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- unsigned numRegClasses = TRI.getNumRegClasses();
- VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
+ VRegMapping.clear();
OutStreamer.EmitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
@@ -443,7 +448,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
OutStreamer.EmitRawText(StringRef("}\n"));
- delete[] VRidGlobal2LocalMap;
+ VRegMapping.clear();
}
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
@@ -500,9 +505,8 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
raw_ostream &O) {
const TargetRegisterClass *RC = MRI->getRegClass(vr);
- unsigned id = RC->getID();
- std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
+ DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
unsigned mapped_vr = regmap[vr];
if (!isVec) {
@@ -695,7 +699,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
else
O << ".func ";
printReturnValStr(F, O);
- O << *CurrentFnSym << "\n";
+ O << *Mang->getSymbol(F) << "\n";
emitFunctionParamList(F, O);
O << ";\n";
}
@@ -795,7 +799,7 @@ static bool useFuncSeen(const Constant *C,
return false;
}
-void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
@@ -805,7 +809,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
continue;
if (F->getIntrinsicID())
continue;
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
continue;
}
@@ -817,14 +820,12 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
// The use is in the initialization of a global variable
// that is a function pointer, so print a declaration
// for the original function
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
// Emit a declaration of this function if the function that
// uses this constant expr has already been seen.
if (useFuncSeen(C, seenMap)) {
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
@@ -844,7 +845,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
// appearing in the module before the callee. so print out
// a declaration for the callee.
if (seenMap.find(caller) != seenMap.end()) {
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
@@ -909,7 +909,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(OutContext, *TM.getDataLayout());
+ Mang = new Mangler(OutContext, &TM);
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -921,6 +921,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
+ GlobalsEmitted = false;
+
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitGlobals(const Module &M) {
SmallString<128> Str2;
raw_svector_ostream OS2(Str2);
@@ -931,13 +937,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
- SmallVector<GlobalVariable *, 8> Globals;
- DenseSet<GlobalVariable *> GVVisited;
- DenseSet<GlobalVariable *> GVVisiting;
+ SmallVector<const GlobalVariable *, 8> Globals;
+ DenseSet<const GlobalVariable *> GVVisited;
+ DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
- ++I)
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
@@ -951,7 +957,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
- return false; // success
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@@ -989,6 +994,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
+
+ // If we did not emit any functions, then the global declarations have not
+ // yet been emitted.
+ if (!GlobalsEmitted) {
+ emitGlobals(M);
+ GlobalsEmitted = true;
+ }
+
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@@ -1063,7 +1076,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
}
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
+ raw_ostream &O,
bool processDemoted) {
// Skip meta data
@@ -1107,10 +1121,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
- Constant *Initializer = NULL;
+ const Constant *Initializer = NULL;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
- ConstantInt *CI = NULL;
+ const ConstantInt *CI = NULL;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@@ -1183,7 +1197,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
- std::vector<GlobalVariable *> temp;
+ std::vector<const GlobalVariable *> temp;
temp.push_back(GVar);
localDecls[demotedFunc] = temp;
}
@@ -1199,17 +1213,20 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
O << " .";
- O << getPTXFundamentalTypeStr(ETy, false);
+ // Special case: ABI requires that we use .u8 for predicates
+ if (ETy->isIntegerTy(1))
+ O << "u8";
+ else
+ O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
O << *Mang->getSymbol(GVar);
// Ptx allows variable initilization only for constant and global state
// spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
O << " = ";
printScalarConstant(Initializer, O);
@@ -1230,10 +1247,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
@@ -1283,7 +1299,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
if (localDecls.find(f) == localDecls.end())
return;
- std::vector<GlobalVariable *> &gvars = localDecls[f];
+ std::vector<const GlobalVariable *> &gvars = localDecls[f];
for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
@@ -1301,14 +1317,6 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
O << "global";
break;
case llvm::ADDRESS_SPACE_CONST:
- // This logic should be consistent with that in
- // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
- if (nvptxSubtarget.hasGenericLdSt())
- O << "global";
- else
- O << "const";
- break;
- case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
O << "const";
break;
case llvm::ADDRESS_SPACE_SHARED:
@@ -1448,7 +1456,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
(nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *CurrentFnSym << "_param_" << paramIndex;
+ O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
else {
std::string argName = I->getName();
const char *p = argName.c_str();
@@ -1507,11 +1515,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (llvm::isImage(*I)) {
std::string sname = I->getName();
if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+ O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+ << paramIndex;
else // Default image is read_only
- O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+ O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+ << paramIndex;
} else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+ O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
<< paramIndex;
continue;
}
@@ -1546,14 +1556,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
default:
O << ".ptr ";
break;
- case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
+ case llvm::ADDRESS_SPACE_CONST:
O << ".ptr .const ";
break;
case llvm::ADDRESS_SPACE_SHARED:
O << ".ptr .shared ";
break;
case llvm::ADDRESS_SPACE_GLOBAL:
- case llvm::ADDRESS_SPACE_CONST:
O << ".ptr .global ";
break;
}
@@ -1564,7 +1573,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// non-pointer scalar to kernel func
- O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
+ O << "\t.param .";
+ // Special case: predicate operands become .u8 types
+ if (Ty->isIntegerTy(1))
+ O << "u8";
+ else
+ O << getPTXFundamentalTypeStr(Ty);
+ O << " ";
printParamName(I, paramIndex, O);
continue;
}
@@ -1680,48 +1695,36 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
for (unsigned i = 0; i < numVRs; i++) {
unsigned int vr = TRI->index2VirtReg(i);
const TargetRegisterClass *RC = MRI->getRegClass(vr);
- std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
+ DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
int n = regmap.size();
regmap.insert(std::make_pair(vr, n + 1));
}
// Emit register declarations
// @TODO: Extract out the real register usage
- O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
- O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
// Emit declaration of the virtual registers or 'physical' registers for
// each register class
- //for (unsigned i=0; i< numRegClasses; i++) {
- // std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
- // const TargetRegisterClass *RC = TRI->getRegClass(i);
- // std::string rcname = getNVPTXRegClassName(RC);
- // std::string rcStr = getNVPTXRegClassStr(RC);
- // //int n = regmap.size();
- // if (!isNVPTXVectorRegClass(RC)) {
- // O << "\t.reg " << rcname << " \t" << rcStr << "<"
- // << NVPTXNumRegisters << ">;\n";
- // }
-
- // Only declare those registers that may be used. And do not emit vector
- // registers as
- // they are all elementized to scalar registers.
- //if (n && !isNVPTXVectorRegClass(RC)) {
- // if (RegAllocNilUsed) {
- // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
- // << ">;\n";
- // }
- // else {
- // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
- // << "<" << 32 << ">;\n";
- // }
- //}
- //}
+ for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
+ const TargetRegisterClass *RC = TRI->getRegClass(i);
+ DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
+ std::string rcname = getNVPTXRegClassName(RC);
+ std::string rcStr = getNVPTXRegClassStr(RC);
+ int n = regmap.size();
+
+ // Only declare those registers that may be used.
+ if (n) {
+ O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
+ << ">;\n";
+ }
+ }
OutStreamer.EmitRawText(O.str());
}
@@ -1751,12 +1754,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
O << utohexstr(API.getZExtValue());
}
-void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
O << CI->getValue();
return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
printFPConstant(CFP, O);
return;
}
@@ -1764,13 +1767,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
O << "0";
return;
}
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
O << *Mang->getSymbol(GVar);
return;
}
- if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
O << *Mang->getSymbol(GVar);
return;
} else {
@@ -1781,7 +1784,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
@@ -1809,13 +1812,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
@@ -1831,13 +1834,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
llvm_unreachable("unsupported integer const type");
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
@@ -1858,7 +1861,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
case Type::FloatTyID:
case Type::DoubleTyID: {
- ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
@@ -1874,10 +1877,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
break;
}
case Type::PointerTyID: {
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
unsigned int s = TD->getTypeAllocSize(CPV->getType());
@@ -1906,7 +1909,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
-void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
int Bytes;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6dc9fc0..55f2943 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<Value *, 4> Symbols;
+ SmallVector<const Value *, 4> Symbols;
private:
unsigned curpos;
@@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
return curpos;
}
- void addSymbol(Value *GVar) {
+ void addSymbol(const Value *GVar) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
numSymbols++;
@@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
- Value *v = Symbols[nSym];
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ const Value *v = Symbols[nSym];
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
@@ -205,10 +205,12 @@ private:
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+ void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
+ bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
+ void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@@ -234,12 +236,16 @@ protected:
private:
std::string CurrentBankselLabelInBasicBlock;
+ bool GlobalsEmitted;
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
// MachineFunction. But the size of the
// array is not.
- std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
+ typedef DenseMap<unsigned, unsigned> VRegMap;
+ typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
+ VRegRCMap VRegMapping;
// cache the subtarget here.
const NVPTXSubtarget &nvptxSubtarget;
// Build the map between type name and ID based on module's type
@@ -247,7 +253,7 @@ private:
std::map<const Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
- std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+ std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
// To record filename to ID mapping
std::map<std::string, unsigned> filenameMap;
@@ -256,15 +262,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
- void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
- void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
- void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+ void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
- void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -277,7 +283,6 @@ public:
: AsmPrinter(TM, Streamer),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
- VRidGlobal2LocalMap = NULL;
reader = NULL;
}
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
new file mode 100644
index 0000000..1077c46
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -0,0 +1,436 @@
+//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convert generic global variables into either .global or .const access based
+// on the variable's "constant" qualifier.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeGenericToNVVMPass(PassRegistry &);
+}
+
+namespace {
+class GenericToNVVM : public ModulePass {
+public:
+ static char ID;
+
+ GenericToNVVM() : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+private:
+ Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
+ IRBuilder<> &Builder);
+ Value *remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
+ Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder);
+ void remapNamedMDNode(Module *M, NamedMDNode *N);
+ MDNode *remapMDNode(Module *M, MDNode *N);
+
+ typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
+ typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
+ GVMapTy GVMap;
+ ConstantToValueMapTy ConstantToValueMap;
+};
+}
+
+char GenericToNVVM::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
+
+INITIALIZE_PASS(
+ GenericToNVVM, "generic-to-nvvm",
+ "Ensure that the global variables are in the global address space", false,
+ false)
+
+bool GenericToNVVM::runOnModule(Module &M) {
+ // Create a clone of each global variable that has the default address space.
+ // The clone is created with the global address space specifier, and the pair
+ // of original global variable and its clone is placed in the GVMap for later
+ // use.
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+ !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
+ !GV->getName().startswith("llvm.")) {
+ GlobalVariable *NewGV = new GlobalVariable(
+ M, GV->getType()->getElementType(), GV->isConstant(),
+ GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+ "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+ NewGV->copyAttributesFrom(GV);
+ GVMap[GV] = NewGV;
+ }
+ }
+
+ // Return immediately, if every global variable has a specific address space
+ // specifier.
+ if (GVMap.empty()) {
+ return false;
+ }
+
+ // Walk through the instructions in function defitinions, and replace any use
+ // of original global variables in GVMap with a use of the corresponding
+ // copies in GVMap. If necessary, promote constants to instructions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->isDeclaration()) {
+ continue;
+ }
+ IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
+ for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
+ ++BBI) {
+ for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
+ ++II) {
+ for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
+ Value *Operand = II->getOperand(i);
+ if (isa<Constant>(Operand)) {
+ II->setOperand(
+ i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ }
+ }
+ }
+ }
+ ConstantToValueMap.clear();
+ }
+
+ // Walk through the metadata section and update the debug information
+ // associated with the global variables in the default address space.
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end();
+ I != E; I++) {
+ remapNamedMDNode(&M, I);
+ }
+
+ // Walk through the global variable initializers, and replace any use of
+ // original global variables in GVMap with a use of the corresponding copies
+ // in GVMap. The copies need to be bitcast to the original global variable
+ // types, as we cannot use cvta in global variable initializers.
+ for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
+ GlobalVariable *GV = I->first;
+ GlobalVariable *NewGV = I->second;
+ ++I;
+ Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+ // At this point, the remaining uses of GV should be found only in global
+ // variable initializers, as other uses have been already been removed
+ // while walking through the instructions in function definitions.
+ for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
+ UI != UE;) {
+ Use &U = (UI++).getUse();
+ U.set(BitCastNewGV);
+ }
+ std::string Name = GV->getName();
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ NewGV->setName(Name);
+ }
+ GVMap.clear();
+
+ return true;
+}
+
+Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
+ GlobalVariable *GV,
+ IRBuilder<> &Builder) {
+ PointerType *GVType = GV->getType();
+ Value *CVTA = NULL;
+
+ // See if the address space conversion requires the operand to be bitcast
+ // to i8 addrspace(n)* first.
+ EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
+ if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
+ // A bitcast to i8 addrspace(n)* on the operand is needed.
+ LLVMContext &Context = M->getContext();
+ unsigned int AddrSpace = GVType->getAddressSpace();
+ Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
+ CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
+ // Insert the address space conversion.
+ Type *ResultType =
+ PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(ResultType);
+ ParamTypes.push_back(DestTy);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
+ // Another bitcast from i8 * to <the element type of GVType> * is
+ // required.
+ DestTy =
+ PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
+ CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
+ } else {
+ // A simple CVTA is enough.
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(PointerType::get(GVType->getElementType(),
+ llvm::ADDRESS_SPACE_GENERIC));
+ ParamTypes.push_back(GVType);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
+ }
+
+ return CVTA;
+}
+
+Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder) {
+ // If the constant C has been converted already in the given function F, just
+ // return the converted value.
+ ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
+ if (CTII != ConstantToValueMap.end()) {
+ return CTII->second;
+ }
+
+ Value *NewValue = C;
+ if (isa<GlobalVariable>(C)) {
+ // If the constant C is a global variable and is found in GVMap, generate a
+ // set set of instructions that convert the clone of C with the global
+ // address space specifier to a generic pointer.
+ // The constant C cannot be used here, as it will be erased from the
+ // module eventually. And the clone of C with the global address space
+ // specifier cannot be used here either, as it will affect the types of
+ // other instructions in the function. Hence, this address space conversion
+ // is required.
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
+ if (I != GVMap.end()) {
+ NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+ }
+ } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
+ isa<ConstantStruct>(C)) {
+ // If any element in the constant vector or aggregate C is or uses a global
+ // variable in GVMap, the constant C needs to be reconstructed, using a set
+ // of instructions.
+ NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
+ } else if (isa<ConstantExpr>(C)) {
+ // If any operand in the constant expression C is or uses a global variable
+ // in GVMap, the constant expression C needs to be reconstructed, using a
+ // set of instructions.
+ NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
+ }
+
+ ConstantToValueMap[C] = NewValue;
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
+ Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any element is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the elements has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the elements has been modified, construct the equivalent
+ // vector or aggregate value with a set instructions and the converted
+ // elements.
+ Value *NewValue = UndefValue::get(C->getType());
+ if (isa<ConstantVector>(C)) {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
+ NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
+ }
+ } else {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ NewValue =
+ Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
+ }
+ }
+
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any operand is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the operands has been modified, construct the instruction with
+ // the converted operands.
+ unsigned Opcode = C->getOpcode();
+ switch (Opcode) {
+ case Instruction::ICmp:
+ // CompareConstantExpr (icmp)
+ return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
+ NewOperands[0], NewOperands[1]);
+ case Instruction::FCmp:
+ // CompareConstantExpr (fcmp)
+ assert(false && "Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
+ return C;
+ case Instruction::ExtractElement:
+ // ExtractElementConstantExpr
+ return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
+ case Instruction::InsertElement:
+ // InsertElementConstantExpr
+ return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ShuffleVector:
+ // ShuffleVector
+ return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ExtractValue:
+ // ExtractValueConstantExpr
+ return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
+ case Instruction::InsertValue:
+ // InsertValueConstantExpr
+ return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
+ C->getIndices());
+ case Instruction::GetElementPtr:
+ // GetElementPtrConstantExpr
+ return cast<GEPOperator>(C)->isInBounds()
+ ? Builder.CreateGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1))
+ : Builder.CreateInBoundsGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1));
+ case Instruction::Select:
+ // SelectConstantExpr
+ return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
+ default:
+ // BinaryConstantExpr
+ if (Instruction::isBinaryOp(Opcode)) {
+ return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
+ NewOperands[0], NewOperands[1]);
+ }
+ // UnaryConstantExpr
+ if (Instruction::isCast(Opcode)) {
+ return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
+ NewOperands[0], C->getType());
+ }
+ assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
+ return C;
+ }
+}
+
+void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<MDNode *, 16> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ MDNode *Operand = N->getOperand(i);
+ MDNode *NewOperand = remapMDNode(M, Operand);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return immediately.
+ if (!OperandChanged) {
+ return;
+ }
+
+ // Replace the old operands with the new operands.
+ N->dropAllReferences();
+ for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
+ E = NewOperands.end();
+ I != E; ++I) {
+ N->addOperand(*I);
+ }
+}
+
+MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<Value *, 8> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = N->getOperand(i);
+ Value *NewOperand = Operand;
+ if (Operand) {
+ if (isa<GlobalVariable>(Operand)) {
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
+ if (I != GVMap.end()) {
+ NewOperand = I->second;
+ if (++i < NumOperands) {
+ NewOperands.push_back(NewOperand);
+ // Address space of the global variable follows the global variable
+ // in the global variable debug info (see createGlobalVariable in
+ // lib/Analysis/DIBuilder.cpp).
+ NewOperand =
+ ConstantInt::get(Type::getInt32Ty(M->getContext()),
+ I->second->getType()->getAddressSpace());
+ }
+ }
+ } else if (isa<MDNode>(Operand)) {
+ NewOperand = remapMDNode(M, cast<MDNode>(Operand));
+ }
+ }
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return N as it is.
+ if (!OperandChanged) {
+ return N;
+ }
+
+ // If any of the operands has been modified, create a new MDNode with the new
+ // operands.
+ return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
+}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0f4c8db..ac6dbb9 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
// Decide how to translate f32 div
do_DIVF32_PREC = UsePrecDivF32;
+ // Decide how to translate f32 sqrt
+ do_SQRTF32_PREC = UsePrecSqrtF32;
// sm less than sm_20 does not support div.rnd. Use div.full.
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
do_DIVF32_PREC = 1;
@@ -120,42 +127,26 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getSrcValue();
+
if (!Src)
- return NVPTX::PTXLdStInstCode::LOCAL;
+ return NVPTX::PTXLdStInstCode::GENERIC;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
- case llvm::ADDRESS_SPACE_LOCAL:
- return NVPTX::PTXLdStInstCode::LOCAL;
- case llvm::ADDRESS_SPACE_GLOBAL:
- return NVPTX::PTXLdStInstCode::GLOBAL;
- case llvm::ADDRESS_SPACE_SHARED:
- return NVPTX::PTXLdStInstCode::SHARED;
- case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
- return NVPTX::PTXLdStInstCode::CONSTANT;
- case llvm::ADDRESS_SPACE_GENERIC:
- return NVPTX::PTXLdStInstCode::GENERIC;
- case llvm::ADDRESS_SPACE_PARAM:
- return NVPTX::PTXLdStInstCode::PARAM;
- case llvm::ADDRESS_SPACE_CONST:
- // If the arch supports generic address space, translate it to GLOBAL
- // for correctness.
- // If the arch does not support generic address space, then the arch
- // does not really support ADDRESS_SPACE_CONST, translate it to
- // to CONSTANT for better performance.
- if (Subtarget.hasGenericLdSt())
- return NVPTX::PTXLdStInstCode::GLOBAL;
- else
- return NVPTX::PTXLdStInstCode::CONSTANT;
- default:
- break;
+ case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
+ default: break;
}
}
- return NVPTX::PTXLdStInstCode::LOCAL;
+ return NVPTX::PTXLdStInstCode::GENERIC;
}
SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
SDNode *NVPTXLD = NULL;
@@ -198,7 +189,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ // Read at least 8 bits (predicates are stored as 8-bit values)
+ unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int fromType;
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -394,7 +386,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT LoadedVT = MemSD->getMemoryVT();
@@ -423,7 +415,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ // Read at least 8 bits (predicates are stored as 8-bit values)
+ unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int FromType;
// The last operand holds the original LoadSDNode::getExtensionType() value
unsigned ExtensionType = cast<ConstantSDNode>(
@@ -775,7 +768,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *LD;
EVT RetVT = N->getValueType(0);
@@ -972,7 +965,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
SDNode *NVPTXST = NULL;
@@ -1207,7 +1200,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *ST;
EVT EltVT = Op1.getValueType();
MemSDNode *MemSD = cast<MemSDNode>(N);
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 70e8e46..ed16d44 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// Otherwise, use div.full
int do_DIVF32_PREC;
+ // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
+ // is true, then generate the corresponding FTZ version.
+ bool do_SQRTF32_PREC;
+
// If true, add .ftz to f32 instructions.
// This is only meaningful for sm_20 and later, as the default
// is not ftz.
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 6e01a5a..6cc850e 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -275,7 +275,7 @@ bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
@@ -435,7 +435,7 @@ std::string NVPTXTargetLowering::getPrototype(
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+ SDLoc dl = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
@@ -449,8 +449,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
SDValue tempChain = Chain;
- Chain =
- DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, true),
+ dl);
SDValue InFlag = Chain.getValue(1);
assert((Outs.size() == Args.size()) &&
@@ -795,7 +796,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
DAG.getIntPtrConstant(uniqueCallSite + 1, true),
- InFlag);
+ InFlag, dl);
uniqueCallSite++;
// set isTailCall to false for now, until we figure out how to express
@@ -810,7 +811,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue
NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
SmallVector<SDValue, 8> Ops;
unsigned NumOperands = Node->getNumOperands();
for (unsigned i = 0; i < NumOperands; ++i) {
@@ -866,7 +867,7 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
@@ -896,7 +897,7 @@ SDValue
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
SDNode *N = Op.getNode();
SDValue Val = N->getOperand(1);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT ValVT = Val.getValueType();
if (ValVT.isVector()) {
@@ -985,7 +986,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// st i8, addr
SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
StoreSDNode *ST = cast<StoreSDNode>(Node);
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
@@ -1046,7 +1047,7 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
const DataLayout *TD = getDataLayout();
@@ -1145,14 +1146,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
false,
TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx + 1);
+ p.getNode()->setIROrder(idx + 1);
InVals.push_back(p);
} else {
// If no ABI, just move the param symbol
SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx + 1);
+ p.getNode()->setIROrder(idx + 1);
InVals.push_back(p);
}
continue;
@@ -1169,7 +1170,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx + 1);
+ p.getNode()->setIROrder(idx + 1);
if (isKernel)
InVals.push_back(p);
else {
@@ -1240,7 +1241,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue NVPTXTargetLowering::LowerReturn(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
SelectionDAG &DAG) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
@@ -1450,7 +1451,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
assert(ResVT.isVector() && "Vector load must have vector type");
@@ -1543,7 +1544,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Intrin = N->getOperand(1);
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
// Get the intrinsic ID
unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 3cd49d3..d3ed63a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -100,7 +100,7 @@ public:
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
- virtual EVT getSetCCResultType(EVT VT) const {
+ virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const {
if (VT.isVector())
return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
return MVT::i1;
@@ -112,7 +112,7 @@ public:
virtual SDValue LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -125,7 +125,7 @@ public:
virtual SDValue
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
SelectionDAG &DAG) const;
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 33a63c2..52be287 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -32,36 +32,36 @@ NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
void NVPTXInstrInfo::copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
- if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
- NVPTX::Int32RegsRegClass.contains(SrcReg))
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+
+ if (DestRC != SrcRC)
+ report_fatal_error("Attempted to created cross-class register copy");
+
+ if (DestRC == &NVPTX::Int32RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
- NVPTX::Int8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
- NVPTX::Int1RegsRegClass.contains(SrcReg))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Int1RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
- NVPTX::Float32RegsRegClass.contains(SrcReg))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Float32RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
- NVPTX::Int16RegsRegClass.contains(SrcReg))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Int16RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
- NVPTX::Int64RegsRegClass.contains(SrcReg))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Int8RegsRegClass)
+ BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Int64RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
- NVPTX::Float64RegsRegClass.contains(SrcReg))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestRC == &NVPTX::Float64RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else {
- llvm_unreachable("Don't know how to copy a register");
+ llvm_unreachable("Bad register copy");
}
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index f43abe2..da6dd39 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
+def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 49e2568..24037ca 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_sqrt_rp_d>;
+// nvvm_sqrt intrinsic
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+
//
// Rsqrt
//
@@ -1510,38 +1520,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-
-def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
-def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
-
-
-
-// @TODO: Revisit this. There is a type
-// contradiction between iPTRAny and iPTR for the def.
-/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;
-def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;*/
-
-
-def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
-def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
// nvvm.ptr.gen.to.param
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
new file mode 100644
index 0000000..843ebed
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -0,0 +1,225 @@
+//===-- NVPTXPrologEpilogPass.cpp - NVPTX prolog/epilog inserter ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a copy of the generic LLVM PrologEpilogInserter pass, modified
+// to remove unneeded functionality and to handle virtual registers. Most code
+// here is a copy of PrologEpilogInserter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class NVPTXPrologEpilogPass : public MachineFunctionPass {
+public:
+ static char ID;
+ NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+};
+}
+
+MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
+ return new NVPTXPrologEpilogPass();
+}
+
+char NVPTXPrologEpilogPass::ID = 0;
+
+bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ bool Modified = false;
+
+ calculateFrameObjectOffsets(MF);
+
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
+ TRI.eliminateFrameIndex(MI, 0, i, NULL);
+ Modified = true;
+ }
+ }
+ }
+
+ // Add function prolog/epilog
+ TFI.emitPrologue(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().isReturn())
+ TFI.emitEpilogue(MF, *I);
+ }
+
+ return Modified;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+void
+NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // NOTE: We do not have a call stack
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // No scavenger
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // No stack protector
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ }
+
+ // No scavenger
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 2824653..bb039f8 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -57,9 +57,9 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
return "%f";
}
if (RC == &NVPTX::Float64RegsRegClass) {
- return "%fd";
+ return "%fl";
} else if (RC == &NVPTX::Int64RegsRegClass) {
- return "%rd";
+ return "%rl";
} else if (RC == &NVPTX::Int32RegsRegClass) {
return "%r";
} else if (RC == &NVPTX::Int16RegsRegClass) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b5..72afe8d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -49,6 +49,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
+void initializeGenericToNVVMPass(PassRegistry&);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+ initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(
@@ -72,7 +74,9 @@ NVPTXTargetMachine::NVPTXTargetMachine(
Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameLowering(
- *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
+ *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
+ initAsmInfo();
+}
void NVPTXTargetMachine32::anchor() {}
@@ -90,7 +94,7 @@ NVPTXTargetMachine64::NVPTXTargetMachine64(
CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-namespace llvm {
+namespace {
class NVPTXPassConfig : public TargetPassConfig {
public:
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
@@ -100,16 +104,36 @@ public:
return getTM<NVPTXTargetMachine>();
}
+ virtual void addIRPasses();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+
+ virtual FunctionPass *createTargetRegisterAllocator(bool) LLVM_OVERRIDE;
+ virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
+ virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
};
-}
+} // end anonymous namespace
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
return PassConfig;
}
+void NVPTXPassConfig::addIRPasses() {
+ // The following passes are known to not play well with virtual regs hanging
+ // around after register allocation (which in our case, is *all* registers).
+ // We explicitly disable them here. We do, however, need some functionality
+ // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
+ // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
+ disablePass(&PrologEpilogCodeInserterID);
+ disablePass(&MachineCopyPropagationID);
+ disablePass(&BranchFolderPassID);
+
+ TargetPassConfig::addIRPasses();
+ addPass(createGenericToNVVMPass());
+}
+
bool NVPTXPassConfig::addInstSelector() {
addPass(createLowerAggrCopies());
addPass(createSplitBBatBarPass());
@@ -119,3 +143,21 @@ bool NVPTXPassConfig::addInstSelector() {
}
bool NVPTXPassConfig::addPreRegAlloc() { return false; }
+bool NVPTXPassConfig::addPostRegAlloc() {
+ addPass(createNVPTXPrologEpilogPass());
+ return false;
+}
+
+FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
+ return 0; // No reg alloc
+}
+
+void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ assert(!RegAllocPass && "NVPTX uses no regalloc!");
+ addPass(&StrongPHIEliminationID);
+}
+
+void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ assert(!RegAllocPass && "NVPTX uses no regalloc!");
+ addPass(&StrongPHIEliminationID);
+}
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 0ad62ce..3cc324b 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTX.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -40,7 +41,7 @@ using namespace llvm;
namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
-class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+class NVVMReflect : public ModulePass {
private:
StringMap<int> VarMap;
typedef DenseMap<std::string, int>::iterator VarMapIter;
@@ -48,9 +49,18 @@ private:
public:
static char ID;
- NVVMReflect() : ModulePass(ID) {
+ NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
- ReflectFunction = 0;
+ }
+
+ NVVMReflect(const StringMap<int> &Mapping)
+ : ModulePass(ID), ReflectFunction(0) {
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+ for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
+ I != E; ++I) {
+ VarMap[(*I).getKey()] = (*I).getValue();
+ }
}
void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
@@ -60,6 +70,14 @@ public:
};
}
+ModulePass *llvm::createNVVMReflectPass() {
+ return new NVVMReflect();
+}
+
+ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
+ return new NVVMReflect(Mapping);
+}
+
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
cl::desc("NVVM reflection, enabled by default"));