aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Konig <christian.koenig@amd.com>2013-03-18 11:34:16 +0000
committerChristian Konig <christian.koenig@amd.com>2013-03-18 11:34:16 +0000
commitb9e8678025891a3eb5431c652df541b632902fc6 (patch)
treebe260cbb79959d236b32122c80bf7b2651861461
parent2d7f19e1e971a96186da15dc35ddb52b6163e36e (diff)
downloadexternal_llvm-b9e8678025891a3eb5431c652df541b632902fc6.zip
external_llvm-b9e8678025891a3eb5431c652df541b632902fc6.tar.gz
external_llvm-b9e8678025891a3eb5431c652df541b632902fc6.tar.bz2
R600/SI: implement indirect adressing for SI
Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177277 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIISelLowering.cpp5
-rw-r--r--lib/Target/R600/SIInstructions.td69
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp117
3 files changed, 190 insertions, 1 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 7a1cd94..93f8c38 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 5a3c901..05b04a9 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1149,6 +1149,31 @@ def SI_KILL : InstSI <
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
@@ -1521,4 +1546,48 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
} // End isSI predicate
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index b215aa2..9a027e7 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -66,6 +66,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
+ const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -84,9 +85,14 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveKill = false;
@@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_BRANCH:
Branch(MI);
break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
}
}
}