From 37ed9c199ca639565f6ce88105f9e39e898d82d0 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Mon, 1 Dec 2014 14:51:49 -0800 Subject: Update aosp/master LLVM for rebase to r222494. Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 1529 ++++++++++++++++++++++++++++---- 1 file changed, 1358 insertions(+), 171 deletions(-) (limited to 'lib/Target/NVPTX/NVPTXISelLowering.cpp') diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index cb452ff..0b0b536 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -48,6 +48,12 @@ static cl::opt sched4reg( "nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); +static cl::opt +FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); + static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { default: @@ -100,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, } // NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) - : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) + : TargetLowering(TM), nvTM(&TM), nvptxSubtarget(TM.getSubtarget()) { // always lower memset, memcpy, and memmove intrinsics to load/store @@ -197,8 +203,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); // Turn FP extload into load/fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers @@ -360,73 +369,379 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::MUL_WIDE_SIGNED"; case NVPTXISD::MUL_WIDE_UNSIGNED: return "NVPTXISD::MUL_WIDE_UNSIGNED"; - case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; + case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; case NVPTXISD::Tex1DFloatFloatLevel: return "NVPTXISD::Tex1DFloatFloatLevel"; case NVPTXISD::Tex1DFloatFloatGrad: return "NVPTXISD::Tex1DFloatFloatGrad"; - case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32"; - case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float"; - case NVPTXISD::Tex1DI32FloatLevel: - return "NVPTXISD::Tex1DI32FloatLevel"; - case NVPTXISD::Tex1DI32FloatGrad: - return "NVPTXISD::Tex1DI32FloatGrad"; - case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; - case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; + case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; + case NVPTXISD::Tex1DS32FloatLevel: + return "NVPTXISD::Tex1DS32FloatLevel"; + case NVPTXISD::Tex1DS32FloatGrad: + return "NVPTXISD::Tex1DS32FloatGrad"; + case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; + case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; + case NVPTXISD::Tex1DU32FloatLevel: + return "NVPTXISD::Tex1DU32FloatLevel"; + case NVPTXISD::Tex1DU32FloatGrad: + return "NVPTXISD::Tex1DU32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; case NVPTXISD::Tex1DArrayFloatFloatLevel: - return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + return "NVPTXISD::Tex1DArrayFloatFloatLevel"; case NVPTXISD::Tex1DArrayFloatFloatGrad: - return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex1DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex1DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32"; + return "NVPTXISD::Tex1DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; + case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; + case NVPTXISD::Tex1DArrayS32FloatLevel: + return "NVPTXISD::Tex1DArrayS32FloatLevel"; + case NVPTXISD::Tex1DArrayS32FloatGrad: + return "NVPTXISD::Tex1DArrayS32FloatGrad"; + case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; + case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; + case NVPTXISD::Tex1DArrayU32FloatLevel: + return "NVPTXISD::Tex1DArrayU32FloatLevel"; + case NVPTXISD::Tex1DArrayU32FloatGrad: + return "NVPTXISD::Tex1DArrayU32FloatGrad"; + case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; case NVPTXISD::Tex2DFloatFloatLevel: return "NVPTXISD::Tex2DFloatFloatLevel"; case NVPTXISD::Tex2DFloatFloatGrad: return "NVPTXISD::Tex2DFloatFloatGrad"; - case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32"; - case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float"; - case NVPTXISD::Tex2DI32FloatLevel: - return "NVPTXISD::Tex2DI32FloatLevel"; - case NVPTXISD::Tex2DI32FloatGrad: - return "NVPTXISD::Tex2DI32FloatGrad"; - case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; + case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; + case NVPTXISD::Tex2DS32FloatLevel: + return "NVPTXISD::Tex2DS32FloatLevel"; + case NVPTXISD::Tex2DS32FloatGrad: + return "NVPTXISD::Tex2DS32FloatGrad"; + case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; + case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; + case NVPTXISD::Tex2DU32FloatLevel: + return "NVPTXISD::Tex2DU32FloatLevel"; + case NVPTXISD::Tex2DU32FloatGrad: + return "NVPTXISD::Tex2DU32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; case NVPTXISD::Tex2DArrayFloatFloatLevel: return "NVPTXISD::Tex2DArrayFloatFloatLevel"; case NVPTXISD::Tex2DArrayFloatFloatGrad: return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex2DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex2DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32"; + case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; + case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; + case NVPTXISD::Tex2DArrayS32FloatLevel: + return "NVPTXISD::Tex2DArrayS32FloatLevel"; + case NVPTXISD::Tex2DArrayS32FloatGrad: + return "NVPTXISD::Tex2DArrayS32FloatGrad"; + case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; + case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; + case NVPTXISD::Tex2DArrayU32FloatLevel: + return "NVPTXISD::Tex2DArrayU32FloatLevel"; + case NVPTXISD::Tex2DArrayU32FloatGrad: + return "NVPTXISD::Tex2DArrayU32FloatGrad"; + case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; case NVPTXISD::Tex3DFloatFloatLevel: return "NVPTXISD::Tex3DFloatFloatLevel"; case NVPTXISD::Tex3DFloatFloatGrad: return "NVPTXISD::Tex3DFloatFloatGrad"; - case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32"; - case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float"; - case NVPTXISD::Tex3DI32FloatLevel: - return "NVPTXISD::Tex3DI32FloatLevel"; - case NVPTXISD::Tex3DI32FloatGrad: - return "NVPTXISD::Tex3DI32FloatGrad"; + case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; + case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; + case NVPTXISD::Tex3DS32FloatLevel: + return "NVPTXISD::Tex3DS32FloatLevel"; + case NVPTXISD::Tex3DS32FloatGrad: + return "NVPTXISD::Tex3DS32FloatGrad"; + case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; + case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; + case NVPTXISD::Tex3DU32FloatLevel: + return "NVPTXISD::Tex3DU32FloatLevel"; + case NVPTXISD::Tex3DU32FloatGrad: + return "NVPTXISD::Tex3DU32FloatGrad"; + case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; + case NVPTXISD::TexCubeFloatFloatLevel: + return "NVPTXISD::TexCubeFloatFloatLevel"; + case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; + case NVPTXISD::TexCubeS32FloatLevel: + return "NVPTXISD::TexCubeS32FloatLevel"; + case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; + case NVPTXISD::TexCubeU32FloatLevel: + return "NVPTXISD::TexCubeU32FloatLevel"; + case NVPTXISD::TexCubeArrayFloatFloat: + return "NVPTXISD::TexCubeArrayFloatFloat"; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + return "NVPTXISD::TexCubeArrayFloatFloatLevel"; + case NVPTXISD::TexCubeArrayS32Float: + return "NVPTXISD::TexCubeArrayS32Float"; + case NVPTXISD::TexCubeArrayS32FloatLevel: + return "NVPTXISD::TexCubeArrayS32FloatLevel"; + case NVPTXISD::TexCubeArrayU32Float: + return "NVPTXISD::TexCubeArrayU32Float"; + case NVPTXISD::TexCubeArrayU32FloatLevel: + return "NVPTXISD::TexCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4R2DFloatFloat: + return "NVPTXISD::Tld4R2DFloatFloat"; + case NVPTXISD::Tld4G2DFloatFloat: + return "NVPTXISD::Tld4G2DFloatFloat"; + case NVPTXISD::Tld4B2DFloatFloat: + return "NVPTXISD::Tld4B2DFloatFloat"; + case NVPTXISD::Tld4A2DFloatFloat: + return "NVPTXISD::Tld4A2DFloatFloat"; + case NVPTXISD::Tld4R2DS64Float: + return "NVPTXISD::Tld4R2DS64Float"; + case NVPTXISD::Tld4G2DS64Float: + return "NVPTXISD::Tld4G2DS64Float"; + case NVPTXISD::Tld4B2DS64Float: + return "NVPTXISD::Tld4B2DS64Float"; + case NVPTXISD::Tld4A2DS64Float: + return "NVPTXISD::Tld4A2DS64Float"; + case NVPTXISD::Tld4R2DU64Float: + return "NVPTXISD::Tld4R2DU64Float"; + case NVPTXISD::Tld4G2DU64Float: + return "NVPTXISD::Tld4G2DU64Float"; + case NVPTXISD::Tld4B2DU64Float: + return "NVPTXISD::Tld4B2DU64Float"; + case NVPTXISD::Tld4A2DU64Float: + return "NVPTXISD::Tld4A2DU64Float"; + + case NVPTXISD::TexUnified1DFloatS32: + return "NVPTXISD::TexUnified1DFloatS32"; + case NVPTXISD::TexUnified1DFloatFloat: + return "NVPTXISD::TexUnified1DFloatFloat"; + case NVPTXISD::TexUnified1DFloatFloatLevel: + return "NVPTXISD::TexUnified1DFloatFloatLevel"; + case NVPTXISD::TexUnified1DFloatFloatGrad: + return "NVPTXISD::TexUnified1DFloatFloatGrad"; + case NVPTXISD::TexUnified1DS32S32: + return "NVPTXISD::TexUnified1DS32S32"; + case NVPTXISD::TexUnified1DS32Float: + return "NVPTXISD::TexUnified1DS32Float"; + case NVPTXISD::TexUnified1DS32FloatLevel: + return "NVPTXISD::TexUnified1DS32FloatLevel"; + case NVPTXISD::TexUnified1DS32FloatGrad: + return "NVPTXISD::TexUnified1DS32FloatGrad"; + case NVPTXISD::TexUnified1DU32S32: + return "NVPTXISD::TexUnified1DU32S32"; + case NVPTXISD::TexUnified1DU32Float: + return "NVPTXISD::TexUnified1DU32Float"; + case NVPTXISD::TexUnified1DU32FloatLevel: + return "NVPTXISD::TexUnified1DU32FloatLevel"; + case NVPTXISD::TexUnified1DU32FloatGrad: + return "NVPTXISD::TexUnified1DU32FloatGrad"; + case NVPTXISD::TexUnified1DArrayFloatS32: + return "NVPTXISD::TexUnified1DArrayFloatS32"; + case NVPTXISD::TexUnified1DArrayFloatFloat: + return "NVPTXISD::TexUnified1DArrayFloatFloat"; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified1DArrayS32S32: + return "NVPTXISD::TexUnified1DArrayS32S32"; + case NVPTXISD::TexUnified1DArrayS32Float: + return "NVPTXISD::TexUnified1DArrayS32Float"; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; + case NVPTXISD::TexUnified1DArrayU32S32: + return "NVPTXISD::TexUnified1DArrayU32S32"; + case NVPTXISD::TexUnified1DArrayU32Float: + return "NVPTXISD::TexUnified1DArrayU32Float"; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; + case NVPTXISD::TexUnified2DFloatS32: + return "NVPTXISD::TexUnified2DFloatS32"; + case NVPTXISD::TexUnified2DFloatFloat: + return "NVPTXISD::TexUnified2DFloatFloat"; + case NVPTXISD::TexUnified2DFloatFloatLevel: + return "NVPTXISD::TexUnified2DFloatFloatLevel"; + case NVPTXISD::TexUnified2DFloatFloatGrad: + return "NVPTXISD::TexUnified2DFloatFloatGrad"; + case NVPTXISD::TexUnified2DS32S32: + return "NVPTXISD::TexUnified2DS32S32"; + case NVPTXISD::TexUnified2DS32Float: + return "NVPTXISD::TexUnified2DS32Float"; + case NVPTXISD::TexUnified2DS32FloatLevel: + return "NVPTXISD::TexUnified2DS32FloatLevel"; + case NVPTXISD::TexUnified2DS32FloatGrad: + return "NVPTXISD::TexUnified2DS32FloatGrad"; + case NVPTXISD::TexUnified2DU32S32: + return "NVPTXISD::TexUnified2DU32S32"; + case NVPTXISD::TexUnified2DU32Float: + return "NVPTXISD::TexUnified2DU32Float"; + case NVPTXISD::TexUnified2DU32FloatLevel: + return "NVPTXISD::TexUnified2DU32FloatLevel"; + case NVPTXISD::TexUnified2DU32FloatGrad: + return "NVPTXISD::TexUnified2DU32FloatGrad"; + case NVPTXISD::TexUnified2DArrayFloatS32: + return "NVPTXISD::TexUnified2DArrayFloatS32"; + case NVPTXISD::TexUnified2DArrayFloatFloat: + return "NVPTXISD::TexUnified2DArrayFloatFloat"; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified2DArrayS32S32: + return "NVPTXISD::TexUnified2DArrayS32S32"; + case NVPTXISD::TexUnified2DArrayS32Float: + return "NVPTXISD::TexUnified2DArrayS32Float"; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; + case NVPTXISD::TexUnified2DArrayU32S32: + return "NVPTXISD::TexUnified2DArrayU32S32"; + case NVPTXISD::TexUnified2DArrayU32Float: + return "NVPTXISD::TexUnified2DArrayU32Float"; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; + case NVPTXISD::TexUnified3DFloatS32: + return "NVPTXISD::TexUnified3DFloatS32"; + case NVPTXISD::TexUnified3DFloatFloat: + return "NVPTXISD::TexUnified3DFloatFloat"; + case NVPTXISD::TexUnified3DFloatFloatLevel: + return "NVPTXISD::TexUnified3DFloatFloatLevel"; + case NVPTXISD::TexUnified3DFloatFloatGrad: + return "NVPTXISD::TexUnified3DFloatFloatGrad"; + case NVPTXISD::TexUnified3DS32S32: + return "NVPTXISD::TexUnified3DS32S32"; + case NVPTXISD::TexUnified3DS32Float: + return "NVPTXISD::TexUnified3DS32Float"; + case NVPTXISD::TexUnified3DS32FloatLevel: + return "NVPTXISD::TexUnified3DS32FloatLevel"; + case NVPTXISD::TexUnified3DS32FloatGrad: + return "NVPTXISD::TexUnified3DS32FloatGrad"; + case NVPTXISD::TexUnified3DU32S32: + return "NVPTXISD::TexUnified3DU32S32"; + case NVPTXISD::TexUnified3DU32Float: + return "NVPTXISD::TexUnified3DU32Float"; + case NVPTXISD::TexUnified3DU32FloatLevel: + return "NVPTXISD::TexUnified3DU32FloatLevel"; + case NVPTXISD::TexUnified3DU32FloatGrad: + return "NVPTXISD::TexUnified3DU32FloatGrad"; + case NVPTXISD::TexUnifiedCubeFloatFloat: + return "NVPTXISD::TexUnifiedCubeFloatFloat"; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeS32Float: + return "NVPTXISD::TexUnifiedCubeS32Float"; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeU32Float: + return "NVPTXISD::TexUnifiedCubeU32Float"; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + return "NVPTXISD::TexUnifiedCubeArrayS32Float"; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + return "NVPTXISD::TexUnifiedCubeArrayU32Float"; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; + case NVPTXISD::Tld4UnifiedR2DS64Float: + return "NVPTXISD::Tld4UnifiedR2DS64Float"; + case NVPTXISD::Tld4UnifiedG2DS64Float: + return "NVPTXISD::Tld4UnifiedG2DS64Float"; + case NVPTXISD::Tld4UnifiedB2DS64Float: + return "NVPTXISD::Tld4UnifiedB2DS64Float"; + case NVPTXISD::Tld4UnifiedA2DS64Float: + return "NVPTXISD::Tld4UnifiedA2DS64Float"; + case NVPTXISD::Tld4UnifiedR2DU64Float: + return "NVPTXISD::Tld4UnifiedR2DU64Float"; + case NVPTXISD::Tld4UnifiedG2DU64Float: + return "NVPTXISD::Tld4UnifiedG2DU64Float"; + case NVPTXISD::Tld4UnifiedB2DU64Float: + return "NVPTXISD::Tld4UnifiedB2DU64Float"; + case NVPTXISD::Tld4UnifiedA2DU64Float: + return "NVPTXISD::Tld4UnifiedA2DU64Float"; + + case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; + case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; + case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; + case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; + case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; + case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; + case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; + case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; + case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; + case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; + case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; + + case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; + case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; + case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; + case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; + case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; + case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; + case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; + case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; + case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; + case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; + case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; + + case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; + case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; + case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; + case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; + case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; + case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; + case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; + case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; + case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; + case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; + case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; + + case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; + case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; + case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; + case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; + case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; + case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; + case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; + case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; + case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; + case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; + case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; + + case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; + case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; + case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; + case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; + case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; + case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; + case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; + case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; + case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; + case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; + case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; @@ -434,9 +749,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; @@ -444,9 +761,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; @@ -454,9 +773,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; @@ -464,12 +785,74 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; + + case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; + case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; + case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; + case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; + case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; + case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; + case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; + case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; + case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; + case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; + case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; + + case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; + case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; + case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; + case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; + case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; + case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; + case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; + case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; + case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; + case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; + case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; + + case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; + case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; + case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; + case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; + case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; + case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; + case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; + case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; + case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; + case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; + case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; + + case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; + case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; + case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; + case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; + case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; + case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; + case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; + case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; + case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; + case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; + case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; + + case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; + case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; + case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; + case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; + case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; + case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; + case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; + case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; + case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; + case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; + case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; } } @@ -972,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[], or // .param .b retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -1068,8 +1456,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), - ObjectVT) == NumElts && + assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( + F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); bool needTruncate = sz < 8 ? true : false; @@ -1494,6 +1882,21 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { break; } + MemSDNode *MemSD = cast(N); + const DataLayout *TD = getDataLayout(); + + unsigned Align = MemSD->getAlignment(); + unsigned PrefAlign = + TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This store is not sufficiently aligned, so bail out and let this vector + // store be scalarized. Note that we may still be able to emit smaller + // vector stores. For example, if we are storing a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return SDValue(); + } + unsigned Opcode = 0; EVT EltVT = ValVT.getVectorElementType(); unsigned NumElts = ValVT.getVectorNumElements(); @@ -1536,8 +1939,6 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { Ops.push_back(N->getOperand(i)); } - MemSDNode *MemSD = cast(N); - SDValue NewSt = DAG.getMemIntrinsicNode( Opcode, DL, DAG.getVTList(MVT::Other), Ops, MemSD->getMemoryVT(), MemSD->getMemOperand()); @@ -1632,7 +2033,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = DAG.getTarget().getTargetLowering(); + const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector OutChains; @@ -1746,7 +2147,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, MachinePointerInfo(srcValue), partVT, false, - false, partAlign); + false, false, partAlign); } else { p = DAG.getLoad(partVT, dl, Root, srcAddr, MachinePointerInfo(srcValue), false, false, false, @@ -1767,7 +2168,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( unsigned NumElts = ObjectVT.getVectorNumElements(); assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); - unsigned Ofst = 0; EVT EltVT = ObjectVT.getVectorElementType(); // V1 load @@ -1776,10 +2176,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // We only have one element, so just directly load it Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -1788,7 +2186,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); InVals.push_back(P); - Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); ++InsIdx; } else if (NumElts == 2) { // V2 load @@ -1796,10 +2193,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); Value *SrcValue = Constant::getNullValue(PointerType::get( VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -1817,7 +2212,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( InVals.push_back(Elt0); InVals.push_back(Elt1); - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); InsIdx += 2; } else { // V4 loads @@ -1835,6 +2229,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( VecSize = 2; } EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); + unsigned Ofst = 0; for (unsigned i = 0; i < NumElts; i += VecSize) { Value *SrcValue = Constant::getNullValue( PointerType::get(VecVT.getTypeForEVT(F->getContext()), @@ -1879,6 +2274,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), ObjectVT, false, false, + false, TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } else { p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, @@ -2132,90 +2528,357 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) { default: return 0; - case Intrinsic::nvvm_tex_1d_v4f32_i32: - return NVPTXISD::Tex1DFloatI32; + case Intrinsic::nvvm_tex_1d_v4f32_s32: + return NVPTXISD::Tex1DFloatS32; case Intrinsic::nvvm_tex_1d_v4f32_f32: return NVPTXISD::Tex1DFloatFloat; case Intrinsic::nvvm_tex_1d_level_v4f32_f32: return NVPTXISD::Tex1DFloatFloatLevel; case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: return NVPTXISD::Tex1DFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_v4i32_i32: - return NVPTXISD::Tex1DI32I32; - case Intrinsic::nvvm_tex_1d_v4i32_f32: - return NVPTXISD::Tex1DI32Float; - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - return NVPTXISD::Tex1DI32FloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - return NVPTXISD::Tex1DI32FloatGrad; - - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: - return NVPTXISD::Tex1DArrayFloatI32; + case Intrinsic::nvvm_tex_1d_v4s32_s32: + return NVPTXISD::Tex1DS32S32; + case Intrinsic::nvvm_tex_1d_v4s32_f32: + return NVPTXISD::Tex1DS32Float; + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + return NVPTXISD::Tex1DS32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + return NVPTXISD::Tex1DS32FloatGrad; + case Intrinsic::nvvm_tex_1d_v4u32_s32: + return NVPTXISD::Tex1DU32S32; + case Intrinsic::nvvm_tex_1d_v4u32_f32: + return NVPTXISD::Tex1DU32Float; + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + return NVPTXISD::Tex1DU32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + return NVPTXISD::Tex1DU32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: + return NVPTXISD::Tex1DArrayFloatS32; case Intrinsic::nvvm_tex_1d_array_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloat; case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - return NVPTXISD::Tex1DArrayI32I32; - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - return NVPTXISD::Tex1DArrayI32Float; - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_v4f32_i32: - return NVPTXISD::Tex2DFloatI32; + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + return NVPTXISD::Tex1DArrayS32S32; + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + return NVPTXISD::Tex1DArrayS32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + return NVPTXISD::Tex1DArrayU32S32; + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + return NVPTXISD::Tex1DArrayU32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_s32: + return NVPTXISD::Tex2DFloatS32; case Intrinsic::nvvm_tex_2d_v4f32_f32: return NVPTXISD::Tex2DFloatFloat; case Intrinsic::nvvm_tex_2d_level_v4f32_f32: return NVPTXISD::Tex2DFloatFloatLevel; case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: return NVPTXISD::Tex2DFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_v4i32_i32: - return NVPTXISD::Tex2DI32I32; - case Intrinsic::nvvm_tex_2d_v4i32_f32: - return NVPTXISD::Tex2DI32Float; - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - return NVPTXISD::Tex2DI32FloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - return NVPTXISD::Tex2DI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: - return NVPTXISD::Tex2DArrayFloatI32; + case Intrinsic::nvvm_tex_2d_v4s32_s32: + return NVPTXISD::Tex2DS32S32; + case Intrinsic::nvvm_tex_2d_v4s32_f32: + return NVPTXISD::Tex2DS32Float; + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + return NVPTXISD::Tex2DS32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + return NVPTXISD::Tex2DS32FloatGrad; + case Intrinsic::nvvm_tex_2d_v4u32_s32: + return NVPTXISD::Tex2DU32S32; + case Intrinsic::nvvm_tex_2d_v4u32_f32: + return NVPTXISD::Tex2DU32Float; + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + return NVPTXISD::Tex2DU32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + return NVPTXISD::Tex2DU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: + return NVPTXISD::Tex2DArrayFloatS32; case Intrinsic::nvvm_tex_2d_array_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloat; case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - return NVPTXISD::Tex2DArrayI32I32; - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - return NVPTXISD::Tex2DArrayI32Float; - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_3d_v4f32_i32: - return NVPTXISD::Tex3DFloatI32; + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + return NVPTXISD::Tex2DArrayS32S32; + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + return NVPTXISD::Tex2DArrayS32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + return NVPTXISD::Tex2DArrayU32S32; + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + return NVPTXISD::Tex2DArrayU32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_s32: + return NVPTXISD::Tex3DFloatS32; case Intrinsic::nvvm_tex_3d_v4f32_f32: return NVPTXISD::Tex3DFloatFloat; case Intrinsic::nvvm_tex_3d_level_v4f32_f32: return NVPTXISD::Tex3DFloatFloatLevel; case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: return NVPTXISD::Tex3DFloatFloatGrad; - case Intrinsic::nvvm_tex_3d_v4i32_i32: - return NVPTXISD::Tex3DI32I32; - case Intrinsic::nvvm_tex_3d_v4i32_f32: - return NVPTXISD::Tex3DI32Float; - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - return NVPTXISD::Tex3DI32FloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: - return NVPTXISD::Tex3DI32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4s32_s32: + return NVPTXISD::Tex3DS32S32; + case Intrinsic::nvvm_tex_3d_v4s32_f32: + return NVPTXISD::Tex3DS32Float; + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + return NVPTXISD::Tex3DS32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + return NVPTXISD::Tex3DS32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4u32_s32: + return NVPTXISD::Tex3DU32S32; + case Intrinsic::nvvm_tex_3d_v4u32_f32: + return NVPTXISD::Tex3DU32Float; + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + return NVPTXISD::Tex3DU32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + return NVPTXISD::Tex3DU32FloatGrad; + + case Intrinsic::nvvm_tex_cube_v4f32_f32: + return NVPTXISD::TexCubeFloatFloat; + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + return NVPTXISD::TexCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_v4s32_f32: + return NVPTXISD::TexCubeS32Float; + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + return NVPTXISD::TexCubeS32FloatLevel; + case Intrinsic::nvvm_tex_cube_v4u32_f32: + return NVPTXISD::TexCubeU32Float; + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + return NVPTXISD::TexCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + return NVPTXISD::TexCubeArrayS32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + return NVPTXISD::TexCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + return NVPTXISD::TexCubeArrayU32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + return NVPTXISD::TexCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + return NVPTXISD::Tld4R2DFloatFloat; + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + return NVPTXISD::Tld4G2DFloatFloat; + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + return NVPTXISD::Tld4B2DFloatFloat; + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + return NVPTXISD::Tld4A2DFloatFloat; + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + return NVPTXISD::Tld4R2DS64Float; + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + return NVPTXISD::Tld4G2DS64Float; + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + return NVPTXISD::Tld4B2DS64Float; + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + return NVPTXISD::Tld4A2DS64Float; + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + return NVPTXISD::Tld4R2DU64Float; + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + return NVPTXISD::Tld4G2DU64Float; + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + return NVPTXISD::Tld4B2DU64Float; + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + return NVPTXISD::Tld4A2DU64Float; + + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + return NVPTXISD::TexUnified1DFloatS32; + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + return NVPTXISD::TexUnified1DS32S32; + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + return NVPTXISD::TexUnified1DS32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + return NVPTXISD::TexUnified1DU32S32; + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + return NVPTXISD::TexUnified1DU32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + return NVPTXISD::TexUnified1DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + return NVPTXISD::TexUnified1DArrayS32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + return NVPTXISD::TexUnified1DArrayU32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + return NVPTXISD::TexUnified2DFloatS32; + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + return NVPTXISD::TexUnified2DS32S32; + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + return NVPTXISD::TexUnified2DS32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + return NVPTXISD::TexUnified2DU32S32; + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + return NVPTXISD::TexUnified2DU32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + return NVPTXISD::TexUnified2DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + return NVPTXISD::TexUnified2DArrayS32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + return NVPTXISD::TexUnified2DArrayU32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + return NVPTXISD::TexUnified3DFloatS32; + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloat; + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + return NVPTXISD::TexUnified3DS32S32; + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + return NVPTXISD::TexUnified3DS32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + return NVPTXISD::TexUnified3DU32S32; + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + return NVPTXISD::TexUnified3DU32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedR2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedG2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedB2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedA2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedR2DS64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedG2DS64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedB2DS64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedA2DS64Float; + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedR2DU64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedG2DU64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedB2DU64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedA2DU64Float; } } @@ -2223,18 +2886,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { switch (Intrinsic) { default: return 0; + case Intrinsic::nvvm_suld_1d_i8_clamp: + return NVPTXISD::Suld1DI8Clamp; + case Intrinsic::nvvm_suld_1d_i16_clamp: + return NVPTXISD::Suld1DI16Clamp; + case Intrinsic::nvvm_suld_1d_i32_clamp: + return NVPTXISD::Suld1DI32Clamp; + case Intrinsic::nvvm_suld_1d_i64_clamp: + return NVPTXISD::Suld1DI64Clamp; + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + return NVPTXISD::Suld1DV2I8Clamp; + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + return NVPTXISD::Suld1DV2I16Clamp; + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + return NVPTXISD::Suld1DV2I32Clamp; + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + return NVPTXISD::Suld1DV2I64Clamp; + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + return NVPTXISD::Suld1DV4I8Clamp; + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + return NVPTXISD::Suld1DV4I16Clamp; + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + return NVPTXISD::Suld1DV4I32Clamp; + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + return NVPTXISD::Suld1DArrayI8Clamp; + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + return NVPTXISD::Suld1DArrayI16Clamp; + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + return NVPTXISD::Suld1DArrayI32Clamp; + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + return NVPTXISD::Suld1DArrayI64Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + return NVPTXISD::Suld1DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + return NVPTXISD::Suld1DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + return NVPTXISD::Suld1DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + return NVPTXISD::Suld1DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + return NVPTXISD::Suld1DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + return NVPTXISD::Suld1DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + return NVPTXISD::Suld1DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_2d_i8_clamp: + return NVPTXISD::Suld2DI8Clamp; + case Intrinsic::nvvm_suld_2d_i16_clamp: + return NVPTXISD::Suld2DI16Clamp; + case Intrinsic::nvvm_suld_2d_i32_clamp: + return NVPTXISD::Suld2DI32Clamp; + case Intrinsic::nvvm_suld_2d_i64_clamp: + return NVPTXISD::Suld2DI64Clamp; + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + return NVPTXISD::Suld2DV2I8Clamp; + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + return NVPTXISD::Suld2DV2I16Clamp; + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + return NVPTXISD::Suld2DV2I32Clamp; + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + return NVPTXISD::Suld2DV2I64Clamp; + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + return NVPTXISD::Suld2DV4I8Clamp; + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + return NVPTXISD::Suld2DV4I16Clamp; + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + return NVPTXISD::Suld2DV4I32Clamp; + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + return NVPTXISD::Suld2DArrayI8Clamp; + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + return NVPTXISD::Suld2DArrayI16Clamp; + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + return NVPTXISD::Suld2DArrayI32Clamp; + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + return NVPTXISD::Suld2DArrayI64Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + return NVPTXISD::Suld2DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + return NVPTXISD::Suld2DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + return NVPTXISD::Suld2DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + return NVPTXISD::Suld2DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + return NVPTXISD::Suld2DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + return NVPTXISD::Suld2DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + return NVPTXISD::Suld2DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_3d_i8_clamp: + return NVPTXISD::Suld3DI8Clamp; + case Intrinsic::nvvm_suld_3d_i16_clamp: + return NVPTXISD::Suld3DI16Clamp; + case Intrinsic::nvvm_suld_3d_i32_clamp: + return NVPTXISD::Suld3DI32Clamp; + case Intrinsic::nvvm_suld_3d_i64_clamp: + return NVPTXISD::Suld3DI64Clamp; + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + return NVPTXISD::Suld3DV2I8Clamp; + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + return NVPTXISD::Suld3DV2I16Clamp; + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + return NVPTXISD::Suld3DV2I32Clamp; + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + return NVPTXISD::Suld3DV2I64Clamp; + case Intrinsic::nvvm_suld_3d_v4i8_clamp: + return NVPTXISD::Suld3DV4I8Clamp; + case Intrinsic::nvvm_suld_3d_v4i16_clamp: + return NVPTXISD::Suld3DV4I16Clamp; + case Intrinsic::nvvm_suld_3d_v4i32_clamp: + return NVPTXISD::Suld3DV4I32Clamp; case Intrinsic::nvvm_suld_1d_i8_trap: return NVPTXISD::Suld1DI8Trap; case Intrinsic::nvvm_suld_1d_i16_trap: return NVPTXISD::Suld1DI16Trap; case Intrinsic::nvvm_suld_1d_i32_trap: return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_i64_trap: + return NVPTXISD::Suld1DI64Trap; case Intrinsic::nvvm_suld_1d_v2i8_trap: return NVPTXISD::Suld1DV2I8Trap; case Intrinsic::nvvm_suld_1d_v2i16_trap: return NVPTXISD::Suld1DV2I16Trap; case Intrinsic::nvvm_suld_1d_v2i32_trap: return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v2i64_trap: + return NVPTXISD::Suld1DV2I64Trap; case Intrinsic::nvvm_suld_1d_v4i8_trap: return NVPTXISD::Suld1DV4I8Trap; case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2247,12 +3024,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld1DArrayI16Trap; case Intrinsic::nvvm_suld_1d_array_i32_trap: return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_i64_trap: + return NVPTXISD::Suld1DArrayI64Trap; case Intrinsic::nvvm_suld_1d_array_v2i8_trap: return NVPTXISD::Suld1DArrayV2I8Trap; case Intrinsic::nvvm_suld_1d_array_v2i16_trap: return NVPTXISD::Suld1DArrayV2I16Trap; case Intrinsic::nvvm_suld_1d_array_v2i32_trap: return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + return NVPTXISD::Suld1DArrayV2I64Trap; case Intrinsic::nvvm_suld_1d_array_v4i8_trap: return NVPTXISD::Suld1DArrayV4I8Trap; case Intrinsic::nvvm_suld_1d_array_v4i16_trap: @@ -2265,12 +3046,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DI16Trap; case Intrinsic::nvvm_suld_2d_i32_trap: return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_i64_trap: + return NVPTXISD::Suld2DI64Trap; case Intrinsic::nvvm_suld_2d_v2i8_trap: return NVPTXISD::Suld2DV2I8Trap; case Intrinsic::nvvm_suld_2d_v2i16_trap: return NVPTXISD::Suld2DV2I16Trap; case Intrinsic::nvvm_suld_2d_v2i32_trap: return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v2i64_trap: + return NVPTXISD::Suld2DV2I64Trap; case Intrinsic::nvvm_suld_2d_v4i8_trap: return NVPTXISD::Suld2DV4I8Trap; case Intrinsic::nvvm_suld_2d_v4i16_trap: @@ -2283,12 +3068,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DArrayI16Trap; case Intrinsic::nvvm_suld_2d_array_i32_trap: return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_i64_trap: + return NVPTXISD::Suld2DArrayI64Trap; case Intrinsic::nvvm_suld_2d_array_v2i8_trap: return NVPTXISD::Suld2DArrayV2I8Trap; case Intrinsic::nvvm_suld_2d_array_v2i16_trap: return NVPTXISD::Suld2DArrayV2I16Trap; case Intrinsic::nvvm_suld_2d_array_v2i32_trap: return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + return NVPTXISD::Suld2DArrayV2I64Trap; case Intrinsic::nvvm_suld_2d_array_v4i8_trap: return NVPTXISD::Suld2DArrayV4I8Trap; case Intrinsic::nvvm_suld_2d_array_v4i16_trap: @@ -2301,18 +3090,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld3DI16Trap; case Intrinsic::nvvm_suld_3d_i32_trap: return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_i64_trap: + return NVPTXISD::Suld3DI64Trap; case Intrinsic::nvvm_suld_3d_v2i8_trap: return NVPTXISD::Suld3DV2I8Trap; case Intrinsic::nvvm_suld_3d_v2i16_trap: return NVPTXISD::Suld3DV2I16Trap; case Intrinsic::nvvm_suld_3d_v2i32_trap: return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v2i64_trap: + return NVPTXISD::Suld3DV2I64Trap; case Intrinsic::nvvm_suld_3d_v4i8_trap: return NVPTXISD::Suld3DV4I8Trap; case Intrinsic::nvvm_suld_3d_v4i16_trap: return NVPTXISD::Suld3DV4I16Trap; case Intrinsic::nvvm_suld_3d_v4i32_trap: return NVPTXISD::Suld3DV4I32Trap; + case Intrinsic::nvvm_suld_1d_i8_zero: + return NVPTXISD::Suld1DI8Zero; + case Intrinsic::nvvm_suld_1d_i16_zero: + return NVPTXISD::Suld1DI16Zero; + case Intrinsic::nvvm_suld_1d_i32_zero: + return NVPTXISD::Suld1DI32Zero; + case Intrinsic::nvvm_suld_1d_i64_zero: + return NVPTXISD::Suld1DI64Zero; + case Intrinsic::nvvm_suld_1d_v2i8_zero: + return NVPTXISD::Suld1DV2I8Zero; + case Intrinsic::nvvm_suld_1d_v2i16_zero: + return NVPTXISD::Suld1DV2I16Zero; + case Intrinsic::nvvm_suld_1d_v2i32_zero: + return NVPTXISD::Suld1DV2I32Zero; + case Intrinsic::nvvm_suld_1d_v2i64_zero: + return NVPTXISD::Suld1DV2I64Zero; + case Intrinsic::nvvm_suld_1d_v4i8_zero: + return NVPTXISD::Suld1DV4I8Zero; + case Intrinsic::nvvm_suld_1d_v4i16_zero: + return NVPTXISD::Suld1DV4I16Zero; + case Intrinsic::nvvm_suld_1d_v4i32_zero: + return NVPTXISD::Suld1DV4I32Zero; + case Intrinsic::nvvm_suld_1d_array_i8_zero: + return NVPTXISD::Suld1DArrayI8Zero; + case Intrinsic::nvvm_suld_1d_array_i16_zero: + return NVPTXISD::Suld1DArrayI16Zero; + case Intrinsic::nvvm_suld_1d_array_i32_zero: + return NVPTXISD::Suld1DArrayI32Zero; + case Intrinsic::nvvm_suld_1d_array_i64_zero: + return NVPTXISD::Suld1DArrayI64Zero; + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + return NVPTXISD::Suld1DArrayV2I8Zero; + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + return NVPTXISD::Suld1DArrayV2I16Zero; + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + return NVPTXISD::Suld1DArrayV2I32Zero; + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + return NVPTXISD::Suld1DArrayV2I64Zero; + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + return NVPTXISD::Suld1DArrayV4I8Zero; + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + return NVPTXISD::Suld1DArrayV4I16Zero; + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + return NVPTXISD::Suld1DArrayV4I32Zero; + case Intrinsic::nvvm_suld_2d_i8_zero: + return NVPTXISD::Suld2DI8Zero; + case Intrinsic::nvvm_suld_2d_i16_zero: + return NVPTXISD::Suld2DI16Zero; + case Intrinsic::nvvm_suld_2d_i32_zero: + return NVPTXISD::Suld2DI32Zero; + case Intrinsic::nvvm_suld_2d_i64_zero: + return NVPTXISD::Suld2DI64Zero; + case Intrinsic::nvvm_suld_2d_v2i8_zero: + return NVPTXISD::Suld2DV2I8Zero; + case Intrinsic::nvvm_suld_2d_v2i16_zero: + return NVPTXISD::Suld2DV2I16Zero; + case Intrinsic::nvvm_suld_2d_v2i32_zero: + return NVPTXISD::Suld2DV2I32Zero; + case Intrinsic::nvvm_suld_2d_v2i64_zero: + return NVPTXISD::Suld2DV2I64Zero; + case Intrinsic::nvvm_suld_2d_v4i8_zero: + return NVPTXISD::Suld2DV4I8Zero; + case Intrinsic::nvvm_suld_2d_v4i16_zero: + return NVPTXISD::Suld2DV4I16Zero; + case Intrinsic::nvvm_suld_2d_v4i32_zero: + return NVPTXISD::Suld2DV4I32Zero; + case Intrinsic::nvvm_suld_2d_array_i8_zero: + return NVPTXISD::Suld2DArrayI8Zero; + case Intrinsic::nvvm_suld_2d_array_i16_zero: + return NVPTXISD::Suld2DArrayI16Zero; + case Intrinsic::nvvm_suld_2d_array_i32_zero: + return NVPTXISD::Suld2DArrayI32Zero; + case Intrinsic::nvvm_suld_2d_array_i64_zero: + return NVPTXISD::Suld2DArrayI64Zero; + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + return NVPTXISD::Suld2DArrayV2I8Zero; + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + return NVPTXISD::Suld2DArrayV2I16Zero; + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + return NVPTXISD::Suld2DArrayV2I32Zero; + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + return NVPTXISD::Suld2DArrayV2I64Zero; + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + return NVPTXISD::Suld2DArrayV4I8Zero; + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + return NVPTXISD::Suld2DArrayV4I16Zero; + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + return NVPTXISD::Suld2DArrayV4I32Zero; + case Intrinsic::nvvm_suld_3d_i8_zero: + return NVPTXISD::Suld3DI8Zero; + case Intrinsic::nvvm_suld_3d_i16_zero: + return NVPTXISD::Suld3DI16Zero; + case Intrinsic::nvvm_suld_3d_i32_zero: + return NVPTXISD::Suld3DI32Zero; + case Intrinsic::nvvm_suld_3d_i64_zero: + return NVPTXISD::Suld3DI64Zero; + case Intrinsic::nvvm_suld_3d_v2i8_zero: + return NVPTXISD::Suld3DV2I8Zero; + case Intrinsic::nvvm_suld_3d_v2i16_zero: + return NVPTXISD::Suld3DV2I16Zero; + case Intrinsic::nvvm_suld_3d_v2i32_zero: + return NVPTXISD::Suld3DV2I32Zero; + case Intrinsic::nvvm_suld_3d_v2i64_zero: + return NVPTXISD::Suld3DV2I64Zero; + case Intrinsic::nvvm_suld_3d_v4i8_zero: + return NVPTXISD::Suld3DV4I8Zero; + case Intrinsic::nvvm_suld_3d_v4i16_zero: + return NVPTXISD::Suld3DV4I16Zero; + case Intrinsic::nvvm_suld_3d_v4i32_zero: + return NVPTXISD::Suld3DV4I32Zero; } } @@ -2366,16 +3269,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } @@ -2395,42 +3289,69 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } - case Intrinsic::nvvm_tex_1d_v4f32_i32: + case Intrinsic::nvvm_tex_1d_v4f32_s32: case Intrinsic::nvvm_tex_1d_v4f32_f32: case Intrinsic::nvvm_tex_1d_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: case Intrinsic::nvvm_tex_1d_array_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_v4f32_i32: + case Intrinsic::nvvm_tex_2d_v4f32_s32: case Intrinsic::nvvm_tex_2d_v4f32_f32: case Intrinsic::nvvm_tex_2d_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: case Intrinsic::nvvm_tex_2d_array_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_3d_v4f32_i32: + case Intrinsic::nvvm_tex_3d_v4f32_s32: case Intrinsic::nvvm_tex_3d_v4f32_f32: case Intrinsic::nvvm_tex_3d_level_v4f32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_cube_v4f32_f32: + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::f32; + Info.memVT = MVT::v4f32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2439,28 +3360,120 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - case Intrinsic::nvvm_tex_1d_v4i32_i32: - case Intrinsic::nvvm_tex_1d_v4i32_f32: - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_v4i32_i32: - case Intrinsic::nvvm_tex_2d_v4i32_f32: - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_3d_v4i32_i32: - case Intrinsic::nvvm_tex_3d_v4i32_f32: - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { + case Intrinsic::nvvm_tex_1d_v4s32_s32: + case Intrinsic::nvvm_tex_1d_v4s32_f32: + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_v4s32_s32: + case Intrinsic::nvvm_tex_2d_v4s32_f32: + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_3d_v4s32_s32: + case Intrinsic::nvvm_tex_3d_v4s32_f32: + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4s32_f32: + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4u32_f32: + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_v4u32_s32: + case Intrinsic::nvvm_tex_1d_v4u32_f32: + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_v4u32_s32: + case Intrinsic::nvvm_tex_2d_v4u32_f32: + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_3d_v4u32_s32: + case Intrinsic::nvvm_tex_3d_v4u32_f32: + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::i32; + Info.memVT = MVT::v4i32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2469,6 +3482,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i8_clamp: + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_i8_clamp: + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_3d_i8_clamp: + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + case Intrinsic::nvvm_suld_3d_v4i8_clamp: case Intrinsic::nvvm_suld_1d_i8_trap: case Intrinsic::nvvm_suld_1d_v2i8_trap: case Intrinsic::nvvm_suld_1d_v4i8_trap: @@ -2483,7 +3511,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i8_trap: case Intrinsic::nvvm_suld_3d_i8_trap: case Intrinsic::nvvm_suld_3d_v2i8_trap: - case Intrinsic::nvvm_suld_3d_v4i8_trap: { + case Intrinsic::nvvm_suld_3d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_i8_zero: + case Intrinsic::nvvm_suld_1d_v2i8_zero: + case Intrinsic::nvvm_suld_1d_v4i8_zero: + case Intrinsic::nvvm_suld_1d_array_i8_zero: + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + case Intrinsic::nvvm_suld_2d_i8_zero: + case Intrinsic::nvvm_suld_2d_v2i8_zero: + case Intrinsic::nvvm_suld_2d_v4i8_zero: + case Intrinsic::nvvm_suld_2d_array_i8_zero: + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + case Intrinsic::nvvm_suld_3d_i8_zero: + case Intrinsic::nvvm_suld_3d_v2i8_zero: + case Intrinsic::nvvm_suld_3d_v4i8_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i8; Info.ptrVal = nullptr; @@ -2494,6 +3537,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i16_clamp: + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_i16_clamp: + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_3d_i16_clamp: + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + case Intrinsic::nvvm_suld_3d_v4i16_clamp: case Intrinsic::nvvm_suld_1d_i16_trap: case Intrinsic::nvvm_suld_1d_v2i16_trap: case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2508,7 +3566,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i16_trap: case Intrinsic::nvvm_suld_3d_i16_trap: case Intrinsic::nvvm_suld_3d_v2i16_trap: - case Intrinsic::nvvm_suld_3d_v4i16_trap: { + case Intrinsic::nvvm_suld_3d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_i16_zero: + case Intrinsic::nvvm_suld_1d_v2i16_zero: + case Intrinsic::nvvm_suld_1d_v4i16_zero: + case Intrinsic::nvvm_suld_1d_array_i16_zero: + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + case Intrinsic::nvvm_suld_2d_i16_zero: + case Intrinsic::nvvm_suld_2d_v2i16_zero: + case Intrinsic::nvvm_suld_2d_v4i16_zero: + case Intrinsic::nvvm_suld_2d_array_i16_zero: + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + case Intrinsic::nvvm_suld_3d_i16_zero: + case Intrinsic::nvvm_suld_3d_v2i16_zero: + case Intrinsic::nvvm_suld_3d_v4i16_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i16; Info.ptrVal = nullptr; @@ -2519,6 +3592,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i32_clamp: + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_i32_clamp: + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_3d_i32_clamp: + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + case Intrinsic::nvvm_suld_3d_v4i32_clamp: case Intrinsic::nvvm_suld_1d_i32_trap: case Intrinsic::nvvm_suld_1d_v2i32_trap: case Intrinsic::nvvm_suld_1d_v4i32_trap: @@ -2533,7 +3621,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i32_trap: case Intrinsic::nvvm_suld_3d_i32_trap: case Intrinsic::nvvm_suld_3d_v2i32_trap: - case Intrinsic::nvvm_suld_3d_v4i32_trap: { + case Intrinsic::nvvm_suld_3d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_i32_zero: + case Intrinsic::nvvm_suld_1d_v2i32_zero: + case Intrinsic::nvvm_suld_1d_v4i32_zero: + case Intrinsic::nvvm_suld_1d_array_i32_zero: + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + case Intrinsic::nvvm_suld_2d_i32_zero: + case Intrinsic::nvvm_suld_2d_v2i32_zero: + case Intrinsic::nvvm_suld_2d_v4i32_zero: + case Intrinsic::nvvm_suld_2d_array_i32_zero: + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + case Intrinsic::nvvm_suld_3d_i32_zero: + case Intrinsic::nvvm_suld_3d_v2i32_zero: + case Intrinsic::nvvm_suld_3d_v4i32_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i32; Info.ptrVal = nullptr; @@ -2544,7 +3647,46 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - + case Intrinsic::nvvm_suld_1d_i64_clamp: + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_i64_clamp: + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_3d_i64_clamp: + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_i64_trap: + case Intrinsic::nvvm_suld_1d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_array_i64_trap: + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + case Intrinsic::nvvm_suld_2d_i64_trap: + case Intrinsic::nvvm_suld_2d_v2i64_trap: + case Intrinsic::nvvm_suld_2d_array_i64_trap: + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + case Intrinsic::nvvm_suld_3d_i64_trap: + case Intrinsic::nvvm_suld_3d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_i64_zero: + case Intrinsic::nvvm_suld_1d_v2i64_zero: + case Intrinsic::nvvm_suld_1d_array_i64_zero: + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + case Intrinsic::nvvm_suld_2d_i64_zero: + case Intrinsic::nvvm_suld_2d_v2i64_zero: + case Intrinsic::nvvm_suld_2d_array_i64_zero: + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + case Intrinsic::nvvm_suld_3d_i64_zero: + case Intrinsic::nvvm_suld_3d_v2i64_zero: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i64; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } } return false; } @@ -2648,7 +3790,31 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { // NVPTX DAG Combining //===----------------------------------------------------------------------===// -extern unsigned FMAContractLevel; +bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, + CodeGenOpt::Level OptLevel) const { + const Function *F = MF.getFunction(); + const TargetOptions &TO = MF.getTarget().Options; + + // Always honor command-line argument + if (FMAContractLevelOpt.getNumOccurrences() > 0) { + return FMAContractLevelOpt > 0; + } else if (OptLevel == 0) { + // Do not contract if we're not optimizing the code + return false; + } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { + // Honor TargetOptions flags that explicitly say fusion is okay + return true; + } else if (F->hasFnAttribute("unsafe-fp-math")) { + // Check for unsafe-fp-math=true coming from Clang + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + return true; + } + + // We did not have a clear indication that fusion is allowed, so assume not + return false; +} /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is @@ -2682,7 +3848,9 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, } else if (N0.getOpcode() == ISD::FMUL) { if (VT == MVT::f32 || VT == MVT::f64) { - if (FMAContractLevel == 0) + const auto *TLI = static_cast( + &DAG.getTargetLoweringInfo()); + if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) return SDValue(); // For floating point: @@ -2867,13 +4035,13 @@ static bool IsMulWideOperandDemotable(SDValue Op, if (Op.getOpcode() == ISD::SIGN_EXTEND || Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Signed; return true; } } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Unsigned; return true; } @@ -3027,8 +4195,7 @@ static SDValue PerformSHLCombine(SDNode *N, SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { - // FIXME: Get this from the DAG somehow - CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive; + CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); switch (N->getOpcode()) { default: break; case ISD::ADD: @@ -3046,6 +4213,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + const DataLayout *TD, SmallVectorImpl &Results) { EVT ResVT = N->getValueType(0); SDLoc DL(N); @@ -3073,6 +4241,20 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, break; } + LoadSDNode *LD = cast(N); + + unsigned Align = LD->getAlignment(); + unsigned PrefAlign = + TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This load is not sufficiently aligned, so bail out and let this vector + // load be scalarized. Note that we may still be able to emit smaller + // vector loads. For example, if we are loading a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return; + } + EVT EltVT = ResVT.getVectorElementType(); unsigned NumElts = ResVT.getVectorNumElements(); @@ -3109,8 +4291,6 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); - LoadSDNode *LD = cast(N); - // The select routine does not have access to the LoadSDNode instance, so // pass along the extension information OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); @@ -3283,7 +4463,7 @@ void NVPTXTargetLowering::ReplaceNodeResults( default: report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: - ReplaceLoadVector(N, DAG, Results); + ReplaceLoadVector(N, DAG, getDataLayout(), Results); return; case ISD::INTRINSIC_W_CHAIN: ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); @@ -3316,3 +4496,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfRangesSection; delete DwarfMacroInfoSection; } + +const MCSection * +NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + return getDataSection(); +} -- cgit v1.1