diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-06-03 17:40:11 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-06-03 17:40:11 +0000 |
commit | e7397ee81ad07cab36362bab5a086f20acc60a80 (patch) | |
tree | 754e9a6f488a46ed07acb9ec250af60e950d0b14 | |
parent | e86f9d70ca29429ea83bc2361cf908dc566783af (diff) | |
download | external_llvm-e7397ee81ad07cab36362bab5a086f20acc60a80.zip external_llvm-e7397ee81ad07cab36362bab5a086f20acc60a80.tar.gz external_llvm-e7397ee81ad07cab36362bab5a086f20acc60a80.tar.bz2 |
R600/SI: Add a calling convention for compute shaders
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183137 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUCallingConv.td | 18 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 28 | ||||
-rw-r--r-- | test/CodeGen/R600/bfi_int.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/lshl.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/lshr.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/mulhu.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/rotr.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/seto.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/setuo.ll | 2 |
10 files changed, 49 insertions, 19 deletions
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 9c30515..e57b5cd 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -32,17 +32,21 @@ def CC_SI : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 - ]>>>, + ]>>> + +]>; - // This is the default for i64 values. - // XXX: We should change this once clang understands the CC_AMDGPU. - CCIfType<[i64], CCAssignToRegWithShadow< - [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], - [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ] - >> +// Calling convention for SI compute kernels +def CC_SI_Kernel : CallingConv<[ + CCIfType<[i64], CCAssignToStack <8, 4>>, + CCIfType<[i32, f32], CCAssignToStack <4, 4>>, + CCIfType<[i16], CCAssignToStack <2, 4>>, + CCIfType<[i8], CCAssignToStack <1, 4>> ]>; def CC_AMDGPU : CallingConv<[ + CCIf<"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"# + "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_SI_Kernel>>, CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"# "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>> ]>; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 4a6f4cc..02d6fab 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -14,9 +14,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPUISelLowering.h" +#include "AMDGPU.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 825812f..65d5479 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -121,7 +121,7 @@ SDValue SITargetLowering::LowerFormalArguments( } // Second split vertices into their elements - if (Arg.VT.isVector()) { + if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) { ISD::InputArg NewArg = Arg; NewArg.Flags.setSplit(); NewArg.VT = Arg.VT.getVectorElementType(); @@ -153,6 +153,14 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(AMDGPU::VGPR1); } + unsigned ArgReg = 0; + // The pointer to the list of arguments is stored in SGPR0, SGPR1 + if (Info->ShaderType == ShaderType::COMPUTE) { + CCInfo.AllocateReg(AMDGPU::SGPR0); + CCInfo.AllocateReg(AMDGPU::SGPR1); + ArgReg = MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass); + } + AnalyzeFormalArguments(CCInfo, Splits); for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { @@ -164,10 +172,26 @@ SDValue SITargetLowering::LowerFormalArguments( } CCValAssign &VA = ArgLocs[ArgIdx++]; + EVT VT = VA.getLocVT(); + + if (VA.isMemLoc()) { + assert(ArgReg); + PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits()); + SDValue BasePtr = DAG.getCopyFromReg(DAG.getRoot(), DL, + ArgReg, MVT::i64); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(VA.getLocMemOffset(), MVT::i64)); + SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), Ptr, + MachinePointerInfo(UndefValue::get(PtrTy)), + VA.getValVT(), false, false, ArgVT.getSizeInBits() >> 3); + InVals.push_back(Arg); + continue; + } assert(VA.isRegLoc() && "Parameter must be in a register!"); unsigned Reg = VA.getLocReg(); - MVT VT = VA.getLocVT(); if (VT == MVT::i64) { // For now assume it is a pointer diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index 707abb3..a1bd09a 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -38,8 +38,8 @@ entry: ; R600-CHECK: @bfi_sha256_ma ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}} -; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}} +; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} +; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index fb698da..9e29b0d 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0 +;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]+}}, {{[SV]GPR[0-9]+}}, 1 define void @test(i32 %p) { %i = mul i32 %p, 2 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index e0ed3ac..eab3fbf 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 +;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]+}}, {{[SV]GPR[0-9]+}}, 1 define void @test(i32 %p) { %i = udiv i32 %p, 2 diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll index bc17a59..eb379d1 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/R600/mulhu.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 -;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 +;CHECK: V_MOV_B32_e32 VGPR{{[0-9]+}}, -1431655765 +;CHECK: V_MUL_HI_U32 VGPR0, {{[SV]GPR[0-9]+}}, {{VGPR[0-9]+}} ;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 define void @test(i32 %p) { diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll index efbdcbf..960d30d 100644 --- a/test/CodeGen/R600/rotr.ll +++ b/test/CodeGen/R600/rotr.ll @@ -22,8 +22,8 @@ entry: ; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}} ; SI-CHECK: @rotl -; SI-CHECK: V_SUB_I32_e32 [[DST:VGPR[0-9]+]], 32, {{VGPR[0-9]+}} -; SI-CHECK: V_ALIGNBIT_B32 {{VGPR[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}, [[DST]] +; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}} +; SI-CHECK: V_ALIGNBIT_B32 {{VGPR[0-9]+, [SV]GPR[0-9]+, VGPR[0-9]+}}, [[DST]] define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) { entry: %0 = shl i32 %x, %y diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index 4622203..19716f8 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 +;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0 define void @main(float %p) { main_body: diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index 0bf5801..929dbb1 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 +;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}, 0, 0, 0, 0 define void @main(float %p) { main_body: |