aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SILowerControlFlow.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp17
1 files changed, 13 insertions, 4 deletions
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index c2e8f02..958763d 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
+ unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Vec;
- MachineInstr *MovRel =
+ MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(SubReg + Off)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
@@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
+ unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Dst;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(SubReg + Off, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
@@ -409,6 +415,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool NeedM0 = false;
@@ -476,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectSrc(MI);
break;
+ case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
@@ -487,6 +495,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
NeedWQM = true;
// Fall through
case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_ADD_U32_RTN:
NeedM0 = true;
break;
@@ -508,7 +517,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::M0).addImm(0xffffffff);
}
- if (NeedWQM) {
+ if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);