diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-07-09 15:03:33 +0000 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-07-09 15:03:33 +0000 |
commit | f2cfef8172fd2eceb036b8caff50623a189ba2ff (patch) | |
tree | 73a7c07a0a19f3084b05c7e2241302be420d59f0 | |
parent | f4bdec2ebeb1306a77e9377583c5799199775f88 (diff) | |
download | external_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.zip external_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.tar.gz external_llvm-f2cfef8172fd2eceb036b8caff50623a189ba2ff.tar.bz2 |
R600: Do not predicated basic block with multiple alu clause
Test is not included as it is several 1000 lines long.
To test this functionnality, a test case must generate at least 2 ALU clauses,
where an ALU clause is ~110 instructions long.
NOTE: This is a candidate for the stable branch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185943 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 5 | ||||
-rw-r--r-- | lib/Target/R600/R600ControlFlowFinalizer.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600EmitClauseMarkers.cpp | 14 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 45 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/R600Packetizer.cpp | 3 | ||||
-rw-r--r-- | test/CodeGen/R600/jump-address.ll | 2 |
7 files changed, 65 insertions, 8 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 90f72de..7a14e50 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() { } bool AMDGPUPassConfig::addPreSched2() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { + addPass(createR600EmitClauseMarkers(*TM)); + } addPass(&IfConverterID); return false; } @@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); - addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); addPass(createR600Packetizer(*TM)); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 887c808..932a6a7 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -256,6 +256,7 @@ private: ClauseContent.push_back(MILit); } } + assert(ClauseContent.size() < 128 && "ALU clause is too big"); ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); return ClauseFile(ClauseHead, ClauseContent); } @@ -276,6 +277,7 @@ private: void EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, unsigned &CfCount) { + Clause.first->getOperand(0).setImm(0); CounterPropagateAddr(Clause.first, CfCount); MachineBasicBlock *BB = Clause.first->getParent(); BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index 0aea2d7..c1da64c 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass { private: static char ID; const R600InstrInfo *TII; + int Address; unsigned OccupiedDwords(MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -159,7 +160,7 @@ private: } MachineBasicBlock::iterator - MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator ClauseHead = I; std::vector<std::pair<unsigned, unsigned> > KCacheBanks; bool PushBeforeModifier = false; @@ -199,20 +200,25 @@ private: unsigned Opcode = PushBeforeModifier ? AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) - .addImm(0) // ADDR + // We don't use the ADDR field until R600ControlFlowFinalizer pass, where + // it is safe to assume it is 0. However if we always put 0 here, the ifcvt + // pass may assume that identical ALU clause starter at the beginning of a + // true and false branch can be factorized which is not the case. + .addImm(Address++) // ADDR .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 .addImm(KCacheBanks.empty()?0:2) // KM0 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 - .addImm(AluInstCount); // COUNT + .addImm(AluInstCount) // COUNT + .addImm(1); // Enabled return I; } public: R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0) { } + TII(0), Address(0) { } virtual bool runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 969a7ce..d0935fa 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { }; } +static +MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); + It != E; ++It) { + if (It->getOpcode() == AMDGPU::CF_ALU || + It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) + return llvm::prior(It.base()); + } + return MBB.end(); +} + unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, @@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 1; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 1; } } else { @@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + return 2; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); return 2; } } @@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); clearFlag(predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); + if (CfAlu == MBB.end()) + break; + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); + CfAlu->setDesc(get(AMDGPU::CF_ALU)); break; } case AMDGPU::JUMP: @@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const { if (MI->getOpcode() == AMDGPU::KILLGT) { return false; + } else if (MI->getOpcode() == AMDGPU::CF_ALU) { + // If the clause start in the middle of MBB then the MBB has more + // than a single clause, unable to predicate several clauses. + if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) + return false; + // TODO: We don't support KC merging atm + if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) + return false; + return true; } else if (isVector(*MI)) { return false; } else { @@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { int PIdx = MI->findFirstPredOperandIdx(); + if (MI->getOpcode() == AMDGPU::CF_ALU) { + MI->getOperand(8).setImm(0); + return true; + } + if (PIdx != -1) { MachineOperand &PMO = MI->getOperand(PIdx); PMO.setReg(Pred[2].getReg()); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 735dcfc..df5c438 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, -i32imm:$COUNT), +i32imm:$COUNT, i32imm:$Enabled), !strconcat(OpName, " $COUNT, @$ADDR, " "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), [] >, CF_ALU_WORD0, CF_ALU_WORD1 { diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 5ee51fa..f4219bd 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator End = MBB->end(); MachineBasicBlock::iterator MI = MBB->begin(); while (MI != End) { - if (MI->isKill()) { + if (MI->isKill() || + (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { MachineBasicBlock::iterator DeleteMI = MI; ++MI; MBB->erase(DeleteMI); diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll index ae9c8bb..9a5f1bc 100644 --- a/test/CodeGen/R600/jump-address.ll +++ b/test/CodeGen/R600/jump-address.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: JUMP @3 +; CHECK: JUMP @7 ; CHECK: EXPORT ; CHECK-NOT: EXPORT |