aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp')
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp59
1 files changed, 26 insertions, 33 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0f4c8db..ac6dbb9 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
// Decide how to translate f32 div
do_DIVF32_PREC = UsePrecDivF32;
+ // Decide how to translate f32 sqrt
+ do_SQRTF32_PREC = UsePrecSqrtF32;
// sm less than sm_20 does not support div.rnd. Use div.full.
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
do_DIVF32_PREC = 1;
@@ -120,42 +127,26 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getSrcValue();
+
if (!Src)
- return NVPTX::PTXLdStInstCode::LOCAL;
+ return NVPTX::PTXLdStInstCode::GENERIC;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
- case llvm::ADDRESS_SPACE_LOCAL:
- return NVPTX::PTXLdStInstCode::LOCAL;
- case llvm::ADDRESS_SPACE_GLOBAL:
- return NVPTX::PTXLdStInstCode::GLOBAL;
- case llvm::ADDRESS_SPACE_SHARED:
- return NVPTX::PTXLdStInstCode::SHARED;
- case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
- return NVPTX::PTXLdStInstCode::CONSTANT;
- case llvm::ADDRESS_SPACE_GENERIC:
- return NVPTX::PTXLdStInstCode::GENERIC;
- case llvm::ADDRESS_SPACE_PARAM:
- return NVPTX::PTXLdStInstCode::PARAM;
- case llvm::ADDRESS_SPACE_CONST:
- // If the arch supports generic address space, translate it to GLOBAL
- // for correctness.
- // If the arch does not support generic address space, then the arch
- // does not really support ADDRESS_SPACE_CONST, translate it to
- // to CONSTANT for better performance.
- if (Subtarget.hasGenericLdSt())
- return NVPTX::PTXLdStInstCode::GLOBAL;
- else
- return NVPTX::PTXLdStInstCode::CONSTANT;
- default:
- break;
+ case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
+ default: break;
}
}
- return NVPTX::PTXLdStInstCode::LOCAL;
+ return NVPTX::PTXLdStInstCode::GENERIC;
}
SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
SDNode *NVPTXLD = NULL;
@@ -198,7 +189,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ // Read at least 8 bits (predicates are stored as 8-bit values)
+ unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int fromType;
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -394,7 +386,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT LoadedVT = MemSD->getMemoryVT();
@@ -423,7 +415,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ // Read at least 8 bits (predicates are stored as 8-bit values)
+ unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int FromType;
// The last operand holds the original LoadSDNode::getExtensionType() value
unsigned ExtensionType = cast<ConstantSDNode>(
@@ -775,7 +768,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *LD;
EVT RetVT = N->getValueType(0);
@@ -972,7 +965,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
SDNode *NVPTXST = NULL;
@@ -1207,7 +1200,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
unsigned Opcode;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
SDNode *ST;
EVT EltVT = Op1.getValueType();
MemSDNode *MemSD = cast<MemSDNode>(N);