aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Danzer <michel.daenzer@amd.com>2013-07-10 16:36:43 +0000
committerMichel Danzer <michel.daenzer@amd.com>2013-07-10 16:36:43 +0000
commit7740daa8ba053294b7448556c049cf6778711d66 (patch)
treeccbd3219bd492392ef21fc62e01406d55ae66c5a
parent0a9aaacd7298e4108eeecef7bad7933ae7cf36c4 (diff)
downloadexternal_llvm-7740daa8ba053294b7448556c049cf6778711d66.zip
external_llvm-7740daa8ba053294b7448556c049cf6778711d66.tar.gz
external_llvm-7740daa8ba053294b7448556c049cf6778711d66.tar.bz2
R600/SI: Initial support for LDS/GDS instructions
Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186009 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp2
-rw-r--r--lib/Target/R600/SIInstrFormats.td24
-rw-r--r--lib/Target/R600/SIInstrInfo.td23
-rw-r--r--lib/Target/R600/SIInstructions.td3
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp16
5 files changed, 68 insertions, 0 deletions
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index c36e1dc..d31da45 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -134,6 +134,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
if (TSFlags & SIInstrFlags::LGKM_CNT) {
MachineOperand &Op = MI.getOperand(0);
+ if (!Op.isReg())
+ Op = MI.getOperand(1);
assert(Op.isReg() && "First LGKM operand must be a register!");
unsigned Reg = Op.getReg();
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 51f323d..434aa7e 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -281,6 +281,30 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Uses = [EXEC] in {
+class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> vdst;
+ bits<1> gds;
+ bits<8> addr;
+ bits<8> data0;
+ bits<8> data1;
+ bits<8> offset0;
+ bits<8> offset1;
+
+ let Inst{7-0} = offset0;
+ let Inst{15-8} = offset1;
+ let Inst{17} = gds;
+ let Inst{25-18} = op;
+ let Inst{31-26} = 0x36; //encoding
+ let Inst{39-32} = addr;
+ let Inst{47-40} = data0;
+ let Inst{55-48} = data1;
+ let Inst{63-56} = vdst;
+
+ let LGKM_CNT = 1;
+}
+
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 36812ca..655a8b1 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -287,6 +287,29 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
// Vector I/O classes
//===----------------------------------------------------------------------===//
+class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
+ op,
+ (outs regClass:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
+ i8imm:$offset0, i8imm:$offset1),
+ asm#" $vdst, $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
+ op,
+ (outs),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
+ i8imm:$offset0, i8imm:$offset1),
+ asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 0;
+ let vdst = 0;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index c9eac7d..09460d8 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -391,6 +391,9 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End isCompare = 1
+def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
+def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
+
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 5b434fb..c2e8f02 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -411,6 +411,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
bool HaveKill = false;
+ bool NeedM0 = false;
bool NeedWQM = false;
unsigned Depth = 0;
@@ -482,6 +483,13 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectDst(MI);
break;
+ case AMDGPU::DS_READ_B32:
+ NeedWQM = true;
+ // Fall through
+ case AMDGPU::DS_WRITE_B32:
+ NeedM0 = true;
+ break;
+
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
@@ -492,6 +500,14 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (NeedM0) {
+ MachineBasicBlock &MBB = MF.front();
+ // Initialize M0 to a value that won't cause LDS access to be discarded
+ // due to offset clamping
+ BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+ AMDGPU::M0).addImm(0xffffffff);
+ }
+
if (NeedWQM) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),