diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-06-03 17:40:18 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-06-03 17:40:18 +0000 |
commit | e5fcc0dee4b41658986047f346201ad98757e7d5 (patch) | |
tree | 26d19d26b6647328831f27829f35c796f0451c74 /lib | |
parent | e7397ee81ad07cab36362bab5a086f20acc60a80 (diff) | |
download | external_llvm-e5fcc0dee4b41658986047f346201ad98757e7d5.zip external_llvm-e5fcc0dee4b41658986047f346201ad98757e7d5.tar.gz external_llvm-e5fcc0dee4b41658986047f346201ad98757e7d5.tar.bz2 |
R600/SI: Add support for work item and work group intrinsics
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183138 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 94 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.h | 4 |
3 files changed, 88 insertions, 15 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index b6547ea..69a0ac9 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -33,8 +33,9 @@ protected: /// MachineFunction. /// /// \returns a RegisterSDNode representing Reg. - SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; + virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 65d5479..5dca0ca 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -76,6 +76,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); @@ -83,6 +85,23 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setSchedulingPreference(Sched::RegPressure); } +SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, + SDLoc DL, SDValue Chain, + unsigned Offset) const { + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits()); + SDValue BasePtr = DAG.getCopyFromReg(Chain, DL, + MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(Offset, MVT::i64)); + return DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, Chain, Ptr, + MachinePointerInfo(UndefValue::get(PtrTy)), + VT, false, false, ArgVT.getSizeInBits() >> 3); + +} + SDValue SITargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, @@ -153,12 +172,11 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(AMDGPU::VGPR1); } - unsigned ArgReg = 0; // The pointer to the list of arguments is stored in SGPR0, SGPR1 if (Info->ShaderType == ShaderType::COMPUTE) { CCInfo.AllocateReg(AMDGPU::SGPR0); CCInfo.AllocateReg(AMDGPU::SGPR1); - ArgReg = MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass); + MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass); } AnalyzeFormalArguments(CCInfo, Splits); @@ -175,17 +193,10 @@ SDValue SITargetLowering::LowerFormalArguments( EVT VT = VA.getLocVT(); if (VA.isMemLoc()) { - assert(ArgReg); - PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_ADDRESS); - EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits()); - SDValue BasePtr = DAG.getCopyFromReg(DAG.getRoot(), DL, - ArgReg, MVT::i64); - SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(VA.getLocMemOffset(), MVT::i64)); - SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), Ptr, - MachinePointerInfo(UndefValue::get(PtrTy)), - VA.getValVT(), false, false, ArgVT.getSizeInBits() >> 3); + // The first 36 bytes of the input buffer contains information about + // thread group and global sizes. + SDValue Arg = LowerParameter(DAG, VT, DL, DAG.getRoot(), + 36 + VA.getLocMemOffset()); InVals.push_back(Arg); continue; } @@ -293,6 +304,54 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + //XXX: Hardcoded we only use two to store the pointer to the parameters. + unsigned NumUserSGPRs = 2; + switch (IntrinsicID) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case Intrinsic::r600_read_ngroups_x: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 0); + case Intrinsic::r600_read_ngroups_y: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 4); + case Intrinsic::r600_read_ngroups_z: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 8); + case Intrinsic::r600_read_global_size_x: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 12); + case Intrinsic::r600_read_global_size_y: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 16); + case Intrinsic::r600_read_global_size_z: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 20); + case Intrinsic::r600_read_local_size_x: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 24); + case Intrinsic::r600_read_local_size_y: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 28); + case Intrinsic::r600_read_local_size_z: + return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 32); + case Intrinsic::r600_read_tgid_x: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, + AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT); + case Intrinsic::r600_read_tgid_y: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, + AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 1), VT); + case Intrinsic::r600_read_tgid_z: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, + AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 2), VT); + case Intrinsic::r600_read_tidig_x: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR0, VT); + case Intrinsic::r600_read_tidig_y: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR1, VT); + case Intrinsic::r600_read_tidig_z: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR2, VT); + + } + } } return SDValue(); } @@ -933,3 +992,12 @@ MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N, } } } + +SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT); + + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), + cast<RegisterSDNode>(VReg)->getReg(), VT); +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 5288a40..9b263b9 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -24,6 +24,8 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; + SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL, + SDValue Chain, unsigned Offset) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -59,6 +61,8 @@ public: SDNode *Node) const; int32_t analyzeImmediate(const SDNode *N) const; + SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const; }; } // End namespace llvm |