aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-07-09 15:03:33 +0000
committerVincent Lejeune <vljn@ovi.com>2013-07-09 15:03:33 +0000
commitf2cfef8172fd2eceb036b8caff50623a189ba2ff (patch)
tree73a7c07a0a19f3084b05c7e2241302be420d59f0
parentf4bdec2ebeb1306a77e9377583c5799199775f88 (diff)
downloadexternal_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.zip
external_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.tar.gz
external_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.tar.bz2
R600: Do not predicated basic block with multiple alu clause
Test is not included as it is several 1000 lines long. To test this functionnality, a test case must generate at least 2 ALU clauses, where an ALU clause is ~110 instructions long. NOTE: This is a candidate for the stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185943 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp5
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp2
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp14
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp45
-rw-r--r--lib/Target/R600/R600Instructions.td2
-rw-r--r--lib/Target/R600/R600Packetizer.cpp3
-rw-r--r--test/CodeGen/R600/jump-address.ll2
7 files changed, 65 insertions, 8 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 90f72de..7a14e50 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
}
bool AMDGPUPassConfig::addPreSched2() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createR600EmitClauseMarkers(*TM));
+ }
addPass(&IfConverterID);
return false;
}
@@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
- addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
addPass(createR600Packetizer(*TM));
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 887c808..932a6a7 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -256,6 +256,7 @@ private:
ClauseContent.push_back(MILit);
}
}
+ assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
return ClauseFile(ClauseHead, ClauseContent);
}
@@ -276,6 +277,7 @@ private:
void
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
+ Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index 0aea2d7..c1da64c 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass {
private:
static char ID;
const R600InstrInfo *TII;
+ int Address;
unsigned OccupiedDwords(MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -159,7 +160,7 @@ private:
}
MachineBasicBlock::iterator
- MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
bool PushBeforeModifier = false;
@@ -199,20 +200,25 @@ private:
unsigned Opcode = PushBeforeModifier ?
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
- .addImm(0) // ADDR
+ // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
+ // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
+ // pass may assume that identical ALU clause starter at the beginning of a
+ // true and false branch can be factorized which is not the case.
+ .addImm(Address++) // ADDR
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
.addImm(KCacheBanks.empty()?0:2) // KM0
.addImm((KCacheBanks.size() < 2)?0:2) // KM1
.addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
.addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
- .addImm(AluInstCount); // COUNT
+ .addImm(AluInstCount) // COUNT
+ .addImm(1); // Enabled
return I;
}
public:
R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
- TII(0) { }
+ TII(0), Address(0) { }
virtual bool runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 969a7ce..d0935fa 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
};
}
+static
+MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
+ It != E; ++It) {
+ if (It->getOpcode() == AMDGPU::CF_ALU ||
+ It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ return llvm::prior(It.base());
+ }
+ return MBB.end();
+}
+
unsigned
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
@@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ return 1;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
@@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ return 2;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
return 2;
}
}
@@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ break;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
+ MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+ if (CfAlu == MBB.end())
+ break;
+ assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(AMDGPU::CF_ALU));
break;
}
case AMDGPU::JUMP:
@@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
if (MI->getOpcode() == AMDGPU::KILLGT) {
return false;
+ } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
+ // If the clause start in the middle of MBB then the MBB has more
+ // than a single clause, unable to predicate several clauses.
+ if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
+ return false;
+ // TODO: We don't support KC merging atm
+ if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
+ return false;
+ return true;
} else if (isVector(*MI)) {
return false;
} else {
@@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
int PIdx = MI->findFirstPredOperandIdx();
+ if (MI->getOpcode() == AMDGPU::CF_ALU) {
+ MI->getOperand(8).setImm(0);
+ return true;
+ }
+
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 735dcfc..df5c438 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
-i32imm:$COUNT),
+i32imm:$COUNT, i32imm:$Enabled),
!strconcat(OpName, " $COUNT, @$ADDR, "
"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 5ee51fa..f4219bd 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
- if (MI->isKill()) {
+ if (MI->isKill() ||
+ (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);
diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
index ae9c8bb..9a5f1bc 100644
--- a/test/CodeGen/R600/jump-address.ll
+++ b/test/CodeGen/R600/jump-address.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: JUMP @3
+; CHECK: JUMP @7
; CHECK: EXPORT
; CHECK-NOT: EXPORT