diff options
Diffstat (limited to 'lib/Target/NVPTX/NVPTXISelLowering.h')
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.h | 324 |
1 files changed, 293 insertions, 31 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 7b4026d..d66d81a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXISELLOWERING_H -#define NVPTXISELLOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H #include "NVPTX.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -77,54 +77,244 @@ enum NodeType { StoreRetvalV4, // Texture intrinsics - Tex1DFloatI32, + Tex1DFloatS32, Tex1DFloatFloat, Tex1DFloatFloatLevel, Tex1DFloatFloatGrad, - Tex1DI32I32, - Tex1DI32Float, - Tex1DI32FloatLevel, - Tex1DI32FloatGrad, - Tex1DArrayFloatI32, + Tex1DS32S32, + Tex1DS32Float, + Tex1DS32FloatLevel, + Tex1DS32FloatGrad, + Tex1DU32S32, + Tex1DU32Float, + Tex1DU32FloatLevel, + Tex1DU32FloatGrad, + Tex1DArrayFloatS32, Tex1DArrayFloatFloat, Tex1DArrayFloatFloatLevel, Tex1DArrayFloatFloatGrad, - Tex1DArrayI32I32, - Tex1DArrayI32Float, - Tex1DArrayI32FloatLevel, - Tex1DArrayI32FloatGrad, - Tex2DFloatI32, + Tex1DArrayS32S32, + Tex1DArrayS32Float, + Tex1DArrayS32FloatLevel, + Tex1DArrayS32FloatGrad, + Tex1DArrayU32S32, + Tex1DArrayU32Float, + Tex1DArrayU32FloatLevel, + Tex1DArrayU32FloatGrad, + Tex2DFloatS32, Tex2DFloatFloat, Tex2DFloatFloatLevel, Tex2DFloatFloatGrad, - Tex2DI32I32, - Tex2DI32Float, - Tex2DI32FloatLevel, - Tex2DI32FloatGrad, - Tex2DArrayFloatI32, + Tex2DS32S32, + Tex2DS32Float, + Tex2DS32FloatLevel, + Tex2DS32FloatGrad, + Tex2DU32S32, + Tex2DU32Float, + Tex2DU32FloatLevel, + Tex2DU32FloatGrad, + Tex2DArrayFloatS32, Tex2DArrayFloatFloat, Tex2DArrayFloatFloatLevel, Tex2DArrayFloatFloatGrad, - Tex2DArrayI32I32, - Tex2DArrayI32Float, - Tex2DArrayI32FloatLevel, - Tex2DArrayI32FloatGrad, - Tex3DFloatI32, + Tex2DArrayS32S32, + Tex2DArrayS32Float, + Tex2DArrayS32FloatLevel, + Tex2DArrayS32FloatGrad, + Tex2DArrayU32S32, + Tex2DArrayU32Float, + Tex2DArrayU32FloatLevel, + Tex2DArrayU32FloatGrad, + Tex3DFloatS32, Tex3DFloatFloat, Tex3DFloatFloatLevel, Tex3DFloatFloatGrad, - Tex3DI32I32, - Tex3DI32Float, - Tex3DI32FloatLevel, - Tex3DI32FloatGrad, + Tex3DS32S32, + Tex3DS32Float, + Tex3DS32FloatLevel, + Tex3DS32FloatGrad, + Tex3DU32S32, + Tex3DU32Float, + Tex3DU32FloatLevel, + Tex3DU32FloatGrad, + TexCubeFloatFloat, + TexCubeFloatFloatLevel, + TexCubeS32Float, + TexCubeS32FloatLevel, + TexCubeU32Float, + TexCubeU32FloatLevel, + TexCubeArrayFloatFloat, + TexCubeArrayFloatFloatLevel, + TexCubeArrayS32Float, + TexCubeArrayS32FloatLevel, + TexCubeArrayU32Float, + TexCubeArrayU32FloatLevel, + Tld4R2DFloatFloat, + Tld4G2DFloatFloat, + Tld4B2DFloatFloat, + Tld4A2DFloatFloat, + Tld4R2DS64Float, + Tld4G2DS64Float, + Tld4B2DS64Float, + Tld4A2DS64Float, + Tld4R2DU64Float, + Tld4G2DU64Float, + Tld4B2DU64Float, + Tld4A2DU64Float, + TexUnified1DFloatS32, + TexUnified1DFloatFloat, + TexUnified1DFloatFloatLevel, + TexUnified1DFloatFloatGrad, + TexUnified1DS32S32, + TexUnified1DS32Float, + TexUnified1DS32FloatLevel, + TexUnified1DS32FloatGrad, + TexUnified1DU32S32, + TexUnified1DU32Float, + TexUnified1DU32FloatLevel, + TexUnified1DU32FloatGrad, + TexUnified1DArrayFloatS32, + TexUnified1DArrayFloatFloat, + TexUnified1DArrayFloatFloatLevel, + TexUnified1DArrayFloatFloatGrad, + TexUnified1DArrayS32S32, + TexUnified1DArrayS32Float, + TexUnified1DArrayS32FloatLevel, + TexUnified1DArrayS32FloatGrad, + TexUnified1DArrayU32S32, + TexUnified1DArrayU32Float, + TexUnified1DArrayU32FloatLevel, + TexUnified1DArrayU32FloatGrad, + TexUnified2DFloatS32, + TexUnified2DFloatFloat, + TexUnified2DFloatFloatLevel, + TexUnified2DFloatFloatGrad, + TexUnified2DS32S32, + TexUnified2DS32Float, + TexUnified2DS32FloatLevel, + TexUnified2DS32FloatGrad, + TexUnified2DU32S32, + TexUnified2DU32Float, + TexUnified2DU32FloatLevel, + TexUnified2DU32FloatGrad, + TexUnified2DArrayFloatS32, + TexUnified2DArrayFloatFloat, + TexUnified2DArrayFloatFloatLevel, + TexUnified2DArrayFloatFloatGrad, + TexUnified2DArrayS32S32, + TexUnified2DArrayS32Float, + TexUnified2DArrayS32FloatLevel, + TexUnified2DArrayS32FloatGrad, + TexUnified2DArrayU32S32, + TexUnified2DArrayU32Float, + TexUnified2DArrayU32FloatLevel, + TexUnified2DArrayU32FloatGrad, + TexUnified3DFloatS32, + TexUnified3DFloatFloat, + TexUnified3DFloatFloatLevel, + TexUnified3DFloatFloatGrad, + TexUnified3DS32S32, + TexUnified3DS32Float, + TexUnified3DS32FloatLevel, + TexUnified3DS32FloatGrad, + TexUnified3DU32S32, + TexUnified3DU32Float, + TexUnified3DU32FloatLevel, + TexUnified3DU32FloatGrad, + TexUnifiedCubeFloatFloat, + TexUnifiedCubeFloatFloatLevel, + TexUnifiedCubeS32Float, + TexUnifiedCubeS32FloatLevel, + TexUnifiedCubeU32Float, + TexUnifiedCubeU32FloatLevel, + TexUnifiedCubeArrayFloatFloat, + TexUnifiedCubeArrayFloatFloatLevel, + TexUnifiedCubeArrayS32Float, + TexUnifiedCubeArrayS32FloatLevel, + TexUnifiedCubeArrayU32Float, + TexUnifiedCubeArrayU32FloatLevel, + Tld4UnifiedR2DFloatFloat, + Tld4UnifiedG2DFloatFloat, + Tld4UnifiedB2DFloatFloat, + Tld4UnifiedA2DFloatFloat, + Tld4UnifiedR2DS64Float, + Tld4UnifiedG2DS64Float, + Tld4UnifiedB2DS64Float, + Tld4UnifiedA2DS64Float, + Tld4UnifiedR2DU64Float, + Tld4UnifiedG2DU64Float, + Tld4UnifiedB2DU64Float, + Tld4UnifiedA2DU64Float, // Surface intrinsics + Suld1DI8Clamp, + Suld1DI16Clamp, + Suld1DI32Clamp, + Suld1DI64Clamp, + Suld1DV2I8Clamp, + Suld1DV2I16Clamp, + Suld1DV2I32Clamp, + Suld1DV2I64Clamp, + Suld1DV4I8Clamp, + Suld1DV4I16Clamp, + Suld1DV4I32Clamp, + + Suld1DArrayI8Clamp, + Suld1DArrayI16Clamp, + Suld1DArrayI32Clamp, + Suld1DArrayI64Clamp, + Suld1DArrayV2I8Clamp, + Suld1DArrayV2I16Clamp, + Suld1DArrayV2I32Clamp, + Suld1DArrayV2I64Clamp, + Suld1DArrayV4I8Clamp, + Suld1DArrayV4I16Clamp, + Suld1DArrayV4I32Clamp, + + Suld2DI8Clamp, + Suld2DI16Clamp, + Suld2DI32Clamp, + Suld2DI64Clamp, + Suld2DV2I8Clamp, + Suld2DV2I16Clamp, + Suld2DV2I32Clamp, + Suld2DV2I64Clamp, + Suld2DV4I8Clamp, + Suld2DV4I16Clamp, + Suld2DV4I32Clamp, + + Suld2DArrayI8Clamp, + Suld2DArrayI16Clamp, + Suld2DArrayI32Clamp, + Suld2DArrayI64Clamp, + Suld2DArrayV2I8Clamp, + Suld2DArrayV2I16Clamp, + Suld2DArrayV2I32Clamp, + Suld2DArrayV2I64Clamp, + Suld2DArrayV4I8Clamp, + Suld2DArrayV4I16Clamp, + Suld2DArrayV4I32Clamp, + + Suld3DI8Clamp, + Suld3DI16Clamp, + Suld3DI32Clamp, + Suld3DI64Clamp, + Suld3DV2I8Clamp, + Suld3DV2I16Clamp, + Suld3DV2I32Clamp, + Suld3DV2I64Clamp, + Suld3DV4I8Clamp, + Suld3DV4I16Clamp, + Suld3DV4I32Clamp, + Suld1DI8Trap, Suld1DI16Trap, Suld1DI32Trap, + Suld1DI64Trap, Suld1DV2I8Trap, Suld1DV2I16Trap, Suld1DV2I32Trap, + Suld1DV2I64Trap, Suld1DV4I8Trap, Suld1DV4I16Trap, Suld1DV4I32Trap, @@ -132,9 +322,11 @@ enum NodeType { Suld1DArrayI8Trap, Suld1DArrayI16Trap, Suld1DArrayI32Trap, + Suld1DArrayI64Trap, Suld1DArrayV2I8Trap, Suld1DArrayV2I16Trap, Suld1DArrayV2I32Trap, + Suld1DArrayV2I64Trap, Suld1DArrayV4I8Trap, Suld1DArrayV4I16Trap, Suld1DArrayV4I32Trap, @@ -142,9 +334,11 @@ enum NodeType { Suld2DI8Trap, Suld2DI16Trap, Suld2DI32Trap, + Suld2DI64Trap, Suld2DV2I8Trap, Suld2DV2I16Trap, Suld2DV2I32Trap, + Suld2DV2I64Trap, Suld2DV4I8Trap, Suld2DV4I16Trap, Suld2DV4I32Trap, @@ -152,9 +346,11 @@ enum NodeType { Suld2DArrayI8Trap, Suld2DArrayI16Trap, Suld2DArrayI32Trap, + Suld2DArrayI64Trap, Suld2DArrayV2I8Trap, Suld2DArrayV2I16Trap, Suld2DArrayV2I32Trap, + Suld2DArrayV2I64Trap, Suld2DArrayV4I8Trap, Suld2DArrayV4I16Trap, Suld2DArrayV4I32Trap, @@ -162,12 +358,74 @@ enum NodeType { Suld3DI8Trap, Suld3DI16Trap, Suld3DI32Trap, + Suld3DI64Trap, Suld3DV2I8Trap, Suld3DV2I16Trap, Suld3DV2I32Trap, + Suld3DV2I64Trap, Suld3DV4I8Trap, Suld3DV4I16Trap, - Suld3DV4I32Trap + Suld3DV4I32Trap, + + Suld1DI8Zero, + Suld1DI16Zero, + Suld1DI32Zero, + Suld1DI64Zero, + Suld1DV2I8Zero, + Suld1DV2I16Zero, + Suld1DV2I32Zero, + Suld1DV2I64Zero, + Suld1DV4I8Zero, + Suld1DV4I16Zero, + Suld1DV4I32Zero, + + Suld1DArrayI8Zero, + Suld1DArrayI16Zero, + Suld1DArrayI32Zero, + Suld1DArrayI64Zero, + Suld1DArrayV2I8Zero, + Suld1DArrayV2I16Zero, + Suld1DArrayV2I32Zero, + Suld1DArrayV2I64Zero, + Suld1DArrayV4I8Zero, + Suld1DArrayV4I16Zero, + Suld1DArrayV4I32Zero, + + Suld2DI8Zero, + Suld2DI16Zero, + Suld2DI32Zero, + Suld2DI64Zero, + Suld2DV2I8Zero, + Suld2DV2I16Zero, + Suld2DV2I32Zero, + Suld2DV2I64Zero, + Suld2DV4I8Zero, + Suld2DV4I16Zero, + Suld2DV4I32Zero, + + Suld2DArrayI8Zero, + Suld2DArrayI16Zero, + Suld2DArrayI32Zero, + Suld2DArrayI64Zero, + Suld2DArrayV2I8Zero, + Suld2DArrayV2I16Zero, + Suld2DArrayV2I32Zero, + Suld2DArrayV2I64Zero, + Suld2DArrayV4I8Zero, + Suld2DArrayV4I16Zero, + Suld2DArrayV4I32Zero, + + Suld3DI8Zero, + Suld3DI16Zero, + Suld3DI32Zero, + Suld3DI64Zero, + Suld3DV2I8Zero, + Suld3DV2I16Zero, + Suld3DV2I32Zero, + Suld3DV2I64Zero, + Suld3DV4I8Zero, + Suld3DV4I16Zero, + Suld3DV4I32Zero }; } @@ -178,7 +436,7 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -237,7 +495,7 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - NVPTXTargetMachine *nvTM; + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } @@ -245,6 +503,10 @@ public: TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; + + bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -274,4 +536,4 @@ private: }; } // namespace llvm -#endif // NVPTXISELLOWERING_H +#endif |