diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-07-09 15:03:11 +0000 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-07-09 15:03:11 +0000 |
commit | c6f13db656c7649f933c74c4f90c09ff74de52a8 (patch) | |
tree | 58bfc64f2591a69c773c9a0872dffff0be097c1f /lib | |
parent | f79b9b859384fbbc065066e5978e39e09a1cc899 (diff) | |
download | external_llvm-c6f13db656c7649f933c74c4f90c09ff74de52a8.zip external_llvm-c6f13db656c7649f933c74c4f90c09ff74de52a8.tar.gz external_llvm-c6f13db656c7649f933c74c4f90c09ff74de52a8.tar.bz2 |
R600: Use DAG lowering pass to handle fcos/fsin
NOTE: This is a candidate for the stable branch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185940 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 39 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 32 |
4 files changed, 52 insertions, 23 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d739a01..7f4468c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -121,6 +121,8 @@ enum { // End AMDIL ISD Opcodes DWORDADDR, FRACT, + COS_HW, + SIN_HW, FMAX, SMAX, UMAX, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ce2aa92..4413734 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -26,7 +26,8 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : - AMDGPUTargetLowering(TM) { + AMDGPUTargetLowering(TM), + Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) { addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); @@ -38,6 +39,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Custom); + setOperationAction(ISD::FSIN, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); @@ -473,6 +477,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::FCOS: + case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); @@ -723,6 +729,37 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, } } +SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { + // On hw >= R700, COS/SIN input must be between -1. and 1. + // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) + EVT VT = Op.getValueType(); + SDValue Arg = Op.getOperand(0); + SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, + DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, + DAG.getConstantFP(0.15915494309, MVT::f32)), + DAG.getConstantFP(0.5, MVT::f32))); + unsigned TrigNode; + switch (Op.getOpcode()) { + case ISD::FCOS: + TrigNode = AMDGPUISD::COS_HW; + break; + case ISD::FSIN: + TrigNode = AMDGPUISD::SIN_HW; + break; + default: + llvm_unreachable("Wrong trig opcode"); + } + SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart, + DAG.getConstantFP(-0.5, MVT::f32))); + if (Gen >= AMDGPUSubtarget::R700) + return TrigVal; + // On R600 hw, COS/SIN input must be between -Pi and Pi. + return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal, + DAG.getConstantFP(3.14159265359, MVT::f32)); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::SETCC, diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index d4ba4c8..a033fcb 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -40,6 +40,7 @@ public: SmallVectorImpl<SDValue> &InVals) const; virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const; private: + unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the /// first nine dwords of a Vertex Buffer. These implicit parameters are /// lowered to load instructions which retreive the values from the Vertex @@ -60,6 +61,7 @@ private: SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index cb887d1..735dcfc 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -364,6 +364,14 @@ def DOT4 : SDNode<"AMDGPUISD::DOT4", [] >; +def COS_HW : SDNode<"AMDGPUISD::COS_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + +def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; @@ -1080,14 +1088,14 @@ class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < } class SIN_Common <bits<11> inst> : R600_1OP < - inst, "SIN", []>{ + inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; } class COS_Common <bits<11> inst> : R600_1OP < - inst, "COS", []> { + inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; @@ -1228,18 +1236,6 @@ let Predicates = [isR600] in { } -// Helper pattern for normalizing inputs to triginomic instructions for R700+ -// cards. -class COS_PAT <InstR600 trig> : Pat< - (fcos f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - -class SIN_PAT <InstR600 trig> : Pat< - (fsin f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - //===----------------------------------------------------------------------===// // R700 Only instructions //===----------------------------------------------------------------------===// @@ -1247,10 +1243,6 @@ class SIN_PAT <InstR600 trig> : Pat< let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; def COS_r700 : COS_Common<0x6F>; - - // R700 normalizes inputs to SIN/COS the same as EG - def : SIN_PAT <SIN_r700>; - def : COS_PAT <COS_r700>; } //===----------------------------------------------------------------------===// @@ -1276,8 +1268,6 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; -def : SIN_PAT <SIN_eg>; -def : COS_PAT <COS_eg>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; //===----------------------------------------------------------------------===// @@ -1726,8 +1716,6 @@ def COS_cm : COS_Common<0x8E>; } // End isVector = 1 def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; -def : SIN_PAT <SIN_cm>; -def : COS_PAT <COS_cm>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; |