aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp50
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h28
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp34
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h6
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp35
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h29
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h3
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h5
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h3
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp90
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp38
28 files changed, 179 insertions, 290 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 3a4a19d..cdd2f1f 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -29,7 +29,6 @@ set(NVPTXCodeGen_sources
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
- NVPTXutil.cpp
NVVMReflect.cpp
)
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 11d737e..b9df3d1 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
InlineAsmEnd = " inline asm";
SupportsDebugInformation = CompileForDebugging;
+ // PTX does not allow .align on functions.
+ HasFunctionAlignment = false;
HasDotTypeDotSizeDirective = false;
Data8bitsDirective = " .b8 ";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 158ca90..2b4d864 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -71,35 +71,23 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
- createNVPTXMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
- createNVPTXMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
- createNVPTXMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
- createNVPTXMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
- createNVPTXMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
- createNVPTXMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32,
- createNVPTXMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64,
- createNVPTXMCInstPrinter);
+ for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter);
+ }
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 98821d2..bfd5123 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#include <stdint.h>
+
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 1f37696..4f3ccf4 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,11 +12,33 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAllocaHoisting.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+using namespace llvm;
-namespace llvm {
+namespace {
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ const char *getPassName() const override {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ bool runOnFunction(Function &function) override;
+};
+} // namespace
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
@@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
return functionModified;
}
-char NVPTXAllocaHoisting::ID = 1;
-static RegisterPass<NVPTXAllocaHoisting>
-X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+char NVPTXAllocaHoisting::ID = 0;
+
+namespace llvm {
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
+}
-FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+INITIALIZE_PASS(
+ NVPTXAllocaHoisting, "alloca-hoisting",
+ "Hoisting alloca instructions in non-entry blocks to the entry block",
+ false, false)
-} // end namespace llvm
+FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; }
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c343980..7a6fc7d 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -14,38 +14,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
-
class FunctionPass;
-class Function;
-
-// Hoisting the alloca instructions in the non-entry blocks to the entry
-// block.
-class NVPTXAllocaHoisting : public FunctionPass {
-public:
- static char ID; // Pass ID
- NVPTXAllocaHoisting() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- const char *getPassName() const override {
- return "NVPTX specific alloca hoisting";
- }
-
- bool runOnFunction(Function &function) override;
-};
extern FunctionPass *createAllocaHoisting();
-
} // end namespace llvm
#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 833db04..cc58b07 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -504,8 +504,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo();
- if (TRI->isVirtualRegister(RegNo)) {
+ if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
@@ -522,15 +521,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false)
+ if (!llvm::getReqNTIDx(F, reqntidx))
reqntidx = 1;
else
specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false)
+ if (!llvm::getReqNTIDy(F, reqntidy))
reqntidy = 1;
else
specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false)
+ if (!llvm::getReqNTIDz(F, reqntidz))
reqntidz = 1;
else
specified = true;
@@ -544,15 +543,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ if (!llvm::getMaxNTIDx(F, maxntidx))
maxntidx = 1;
else
specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ if (!llvm::getMaxNTIDy(F, maxntidy))
maxntidy = 1;
else
specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ if (!llvm::getMaxNTIDz(F, maxntidz))
maxntidz = 1;
else
specified = true;
@@ -673,7 +672,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
}
for (const User *UU : U->users())
- if (usedInOneFunc(UU, oneFunc) == false)
+ if (!usedInOneFunc(UU, oneFunc))
return false;
return true;
@@ -687,7 +686,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
* 3. Is the global variable referenced only in one function?
*/
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (gv->hasInternalLinkage() == false)
+ if (!gv->hasInternalLinkage())
return false;
const PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
@@ -696,7 +695,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
- if (flag == false)
+ if (!flag)
return false;
if (!oneFunc)
return false;
@@ -1472,7 +1471,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1788,7 +1787,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1810,7 +1809,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -2085,13 +2084,6 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
}
}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
-}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
LineReader *reader = this->getReader(filename.str());
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7e6b5e8..9b11e70 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -92,8 +92,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
bool EmitGeneric;
public:
- AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : size(_size), buffer(_size), O(_O), AP(_AP) {
+ AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
+ : size(size), buffer(size), O(O), AP(AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index f3a095d..6d7c99c 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -123,10 +123,9 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
// =>
// %0 = gep X, indices
// %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
- Indices,
- GEP->getName(),
- GEPI);
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), Cast->getOperand(0), Indices,
+ GEP->getName(), GEPI);
NewGEPI->setIsInBounds(GEP->isInBounds());
GEP->replaceAllUsesWith(
new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 86d134b..850c020 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -343,6 +343,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// GetElementPtrConstantExpr
return cast<GEPOperator>(C)->isInBounds()
? Builder.CreateGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e01c780..52c5e1b 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -78,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
return UsePrecSqrtF32;
} else {
// Otherwise, use sqrt.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return false;
- else
- return true;
+ return !TM.Options.UnsafeFPMath;
}
}
@@ -5044,12 +5041,12 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default:
return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ca432b5..6d845c9 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -48,7 +48,7 @@ public:
const NVPTXSubtarget *Subtarget;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 1dc81f7..ff74e6e 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -930,7 +930,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
}
first = false;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
@@ -1075,7 +1075,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VT = Outs[OIdx].VT;
Type *Ty = Args[i].Ty;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
@@ -1459,7 +1459,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (NumElts == 1) {
// Just a simple load
@@ -1577,7 +1577,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1940,9 +1940,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
}
// Then any remaining arguments
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Ops.append(N->op_begin() + 2, N->op_end());
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
@@ -2118,7 +2116,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
@@ -4494,7 +4492,6 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfLocSection;
delete DwarfARangesSection;
delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
}
const MCSection *
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 1b4da2c..8594364 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -497,6 +497,12 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f0c3663..578401a 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -22,10 +24,33 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "nvptx"
using namespace llvm;
-namespace llvm { FunctionPass *createLowerAggrCopies(); }
+namespace {
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ const char *getPassName() const override {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+} // namespace
char NVPTXLowerAggrCopies::ID = 0;
@@ -104,7 +129,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<MemTransferInst *, 4> aggrMemcpys;
SmallVector<MemSetInst *, 4> aggrMemsets;
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
//
@@ -117,10 +142,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false)
+ if (!load->hasOneUse())
continue;
- if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
continue;
User *use = load->user_back();
@@ -166,7 +191,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL->getTypeStoreSize(load->getType());
+ unsigned numLoads = DL.getTypeStoreSize(load->getType());
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index da301d5..3c39f53 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,35 +15,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
+class FunctionPass;
-// actual analysis class, which is a functionpass
-struct NVPTXLowerAggrCopies : public FunctionPass {
- static char ID;
-
- NVPTXLowerAggrCopies() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- bool runOnFunction(Function &F) override;
-
- static const unsigned MaxAggrCopySize = 128;
-
- const char *getPassName() const override {
- return "Lower aggregate copies/intrinsics into loops";
- }
-};
-
-extern FunctionPass *createLowerAggrCopies();
+FunctionPass *createLowerAggrCopies();
}
#endif
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
index 3149399..68dfbb7 100644
--- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -35,7 +35,8 @@ namespace llvm {
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
-class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+namespace {
+class NVPTXLowerStructArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
void handleStructPtrArgs(Function &);
@@ -48,6 +49,7 @@ public:
return "Copy structure (byval *) arguments to stack";
}
};
+} // namespace
char NVPTXLowerStructArgs::ID = 1;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index d39a394..f075b8b 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -29,8 +29,8 @@ private:
const VariantKind Kind;
const APFloat Flt;
- explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt)
- : Kind(_Kind), Flt(_Flt) {}
+ explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt)
+ : Kind(Kind), Flt(Flt) {}
public:
/// @name Construction
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 5ca96e4..6e97f9ef 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -78,7 +78,7 @@ NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
/// NVPTX Callee Saved Registers
const MCPhysReg *
-NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 75b8f15..c310a9c 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -35,8 +35,7 @@ public:
//------------------------------------------------------
// NVPTX callee saved registers
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1d3cb4..0d2627d 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -26,7 +26,7 @@ namespace llvm {
class NVPTXSection : public MCSection {
virtual void anchor();
public:
- NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
@@ -36,11 +36,8 @@ public:
const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- bool isBaseAddressKnownZero() const override { return true; }
bool UseCodeAlign() const override { return false; }
bool isVirtualSection() const override { return false; }
- std::string getLabelBeginName() const override { return ""; }
- std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 1a267a6..1b6bc71 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -50,6 +50,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
@@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
@@ -86,9 +88,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit),
- TLOF(make_unique<NVPTXTargetObjectFile>()),
- DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) {
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this) {
if (Triple(TT).getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
@@ -183,8 +186,7 @@ void NVPTXPassConfig::addIRPasses() {
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST =
- getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a81abfe..b8df5af 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -27,7 +27,6 @@ namespace llvm {
class NVPTXTargetMachine : public LLVMTargetMachine {
bool is64bit;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
@@ -40,8 +39,10 @@ public:
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
~NVPTXTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+ const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
bool is64Bit() const { return is64bit; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 00ceca5..5d9ab0d 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,7 +41,6 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
- DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -83,8 +82,6 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
- DwarfMacroInfoSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
const MCSection *getSectionForConstant(SectionKind Kind,
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index cf1feac..1f178af 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
bool retval = llvm::findOneNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
- if (retval == false) {
+ if (!retval) {
// There is no NVVM metadata, check the calling convention
- if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
- return true;
- else
- return false;
+ return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
}
return (x == 1);
}
@@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
bool retval = llvm::findAllNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
- if (retval == false)
+ if (!retval)
return false;
for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
deleted file mode 100644
index 5f074b3..0000000
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTXutil.h"
-#include "NVPTX.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-bool isParamLoad(const MachineInstr *MI) {
- if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
- (MI->getOpcode() != NVPTX::LD_i64_avar))
- return false;
- if (MI->getOperand(2).isImm() == false)
- return false;
- if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
- return false;
- return true;
-}
-
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
-
-static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
- char *a;
- char *end = space + splen;
-
- a = space;
- do {
- unsigned char uc;
-
- if (a >= end)
- return 1;
- uc = val & DATA_MASK;
- val >>= DIGIT_WIDTH;
- if (val != 0)
- uc |= MORE_BYTES;
- *a = uc;
- a++;
- } while (val);
- *nbytes = a - space;
- return 0;
-}
-
-#undef DATA_MASK
-#undef DIGIT_WIDTH
-#undef MORE_BYTES
-
-uint64_t encode_leb128(const char *str) {
- union {
- uint64_t x;
- char a[8];
- } temp64;
-
- temp64.x = 0;
-
- for (unsigned i = 0, e = strlen(str); i != e; ++i)
- temp64.a[i] = str[e - 1 - i];
-
- char encoded[16];
- int nbytes;
-
- int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
-
- (void) retval;
- assert(retval == 0 && "Encoding to leb128 failed");
-
- assert(nbytes <= 8 &&
- "Cannot support register names with leb128 encoding > 8 bytes");
-
- temp64.x = 0;
- for (int i = 0; i < nbytes; ++i)
- temp64.a[i] = encoded[i];
-
- return temp64.x;
-}
-
-} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
deleted file mode 100644
index 1915dac..0000000
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-bool isParamLoad(const MachineInstr *);
-uint64_t encode_leb128(const char *str);
-}
-
-#endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index a8d6b95..5e375b7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <map>
#include <sstream>
@@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// ConstantArray can be found successfully, see if it can be
// found in VarMap. If so, replace the uses of CallInst with the
// value found in VarMap. If not, replace the use with value 0.
+
+ // IR for __nvvm_reflect calls differs between CUDA versions:
+ // CUDA 6.5 and earlier uses this sequence:
+ // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
+ // (i8 addrspace(4)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+ //
+ // Value returned by Sym->getOperand(0) is a Constant with a
+ // ConstantDataSequential operand which can be converted to string and used
+ // for lookup.
+ //
+ // CUDA 7.0 does it slightly differently:
+ // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
+ // (i8 addrspace(1)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+ //
+ // In this case, we get a Constant with a GlobalVariable operand and we need
+ // to dig deeper to find its initializer with the string we'll use for lookup.
+
for (User *U : ReflectFunction->users()) {
assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
CallInst *Reflect = cast<CallInst>(U);
@@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
- const Constant *SymStr = cast<Constant>(Sym);
+ const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
+ // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
+ // initializer.
+ assert(GV->hasInitializer() &&
+ "Format of _reflect function not recognized");
+ const Constant *Initializer = GV->getInitializer();
+ Operand = Initializer;
+ }
- assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ assert(isa<ConstantDataSequential>(Operand) &&
"Format of _reflect function not recognized");
-
- assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");
std::string ReflectArg =
- cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+ cast<ConstantDataSequential>(Operand)->getAsString();
ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");