aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/R600ControlFlowFinalizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp66
1 files changed, 47 insertions, 19 deletions
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ab29d60..715be37 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -172,7 +172,7 @@ private:
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
- unsigned LiteralRegs[] = {
+ static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
@@ -256,6 +256,7 @@ private:
ClauseContent.push_back(MILit);
}
}
+ assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, ClauseContent);
}
@@ -276,6 +277,7 @@ private:
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
+ Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
@@ -345,6 +347,9 @@ public:
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
+ std::vector<MachineInstr *> LastAlu(1);
+ std::vector<MachineInstr *> ToPopAfter;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -355,6 +360,10 @@ public:
}
MachineBasicBlock::iterator MI = I;
+ if (MI->getOpcode() != AMDGPU::ENDIF)
+ LastAlu.back() = 0;
+ if (MI->getOpcode() == AMDGPU::CF_ALU)
+ LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
@@ -369,7 +378,10 @@ public:
case AMDGPU::R600_ExportBuf:
case AMDGPU::R600_ExportSwz:
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+ case AMDGPU::RAT_STORE_DWORD32_cm:
+ case AMDGPU::RAT_STORE_DWORD64_cm:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
@@ -400,6 +412,7 @@ public:
break;
}
case AMDGPU::IF_PREDICATE_SET: {
+ LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -417,7 +430,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
- .addImm(1);
+ .addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
@@ -426,31 +439,31 @@ public:
}
case AMDGPU::ENDIF: {
CurrentStack--;
+ if (LastAlu.back()) {
+ ToPopAfter.push_back(LastAlu.back());
+ } else {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ getHWInstrDesc(CF_POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ (void)MIb;
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CfCount++;
+ }
+
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount + 1);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CounterPropagateAddr(IfOrElseInst, CfCount);
+ IfOrElseInst->getOperand(1).setImm(1);
+ LastAlu.pop_back();
MI->eraseFromParent();
- CfCount++;
break;
}
- case AMDGPU::PREDICATED_BREAK: {
- CurrentStack--;
- CfCount += 3;
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
- .addImm(CfCount)
- .addImm(1);
+ case AMDGPU::BREAK: {
+ CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
- .addImm(CfCount)
- .addImm(1);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
break;
@@ -481,6 +494,21 @@ public:
break;
}
}
+ for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
+ MachineInstr *Alu = ToPopAfter[i];
+ BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
+ TII->get(AMDGPU::CF_ALU_POP_AFTER))
+ .addImm(Alu->getOperand(0).getImm())
+ .addImm(Alu->getOperand(1).getImm())
+ .addImm(Alu->getOperand(2).getImm())
+ .addImm(Alu->getOperand(3).getImm())
+ .addImm(Alu->getOperand(4).getImm())
+ .addImm(Alu->getOperand(5).getImm())
+ .addImm(Alu->getOperand(6).getImm())
+ .addImm(Alu->getOperand(7).getImm())
+ .addImm(Alu->getOperand(8).getImm());
+ Alu->eraseFromParent();
+ }
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}