diff options
author | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
commit | 1878f9a7874b1ff569d745c0269f49d3daf7203d (patch) | |
tree | 19a8dbaaedf6a056c617e87596b32d3f452af137 /lib/Target/R600/R600ControlFlowFinalizer.cpp | |
parent | 7a57f27b857ec4b243d83d392a399f02fc196c0a (diff) | |
parent | 100fbdd06be7590b23c4707a98cd605bdb519498 (diff) | |
download | external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.zip external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.gz external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.bz2 |
Merge commit '100fbdd06be7590b23c4707a98cd605bdb519498' into merge_20130612
Diffstat (limited to 'lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r-- | lib/Target/R600/R600ControlFlowFinalizer.cpp | 92 |
1 files changed, 49 insertions, 43 deletions
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 0995795..ab29d60 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -14,8 +14,6 @@ #define DEBUG_TYPE "r600cf" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "AMDGPU.h" #include "R600Defines.h" #include "R600InstrInfo.h" @@ -24,8 +22,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; -namespace llvm { +namespace { class R600ControlFlowFinalizer : public MachineFunctionPass { @@ -48,7 +49,7 @@ private: static char ID; const R600InstrInfo *TII; - const R600RegisterInfo &TRI; + const R600RegisterInfo *TRI; unsigned MaxFetchInst; const AMDGPUSubtarget &ST; @@ -64,7 +65,7 @@ private: const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { unsigned Opcode = 0; - bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX); + bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN); switch (CFI) { case CF_TC: Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; @@ -97,7 +98,7 @@ private: Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; break; case CF_END: - if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) { + if (ST.hasCaymanISA()) { Opcode = AMDGPU::CF_END_CM; break; } @@ -109,28 +110,33 @@ private: } bool isCompatibleWithClause(const MachineInstr *MI, - std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const { + std::set<unsigned> &DstRegs) const { unsigned DstMI, SrcMI; for (MachineInstr::const_mop_iterator I = MI->operands_begin(), E = MI->operands_end(); I != E; ++I) { const MachineOperand &MO = *I; if (!MO.isReg()) continue; - if (MO.isDef()) - DstMI = MO.getReg(); + if (MO.isDef()) { + unsigned Reg = MO.getReg(); + if (AMDGPU::R600_Reg128RegClass.contains(Reg)) + DstMI = Reg; + else + DstMI = TRI->getMatchingSuperReg(Reg, + TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), + &AMDGPU::R600_Reg128RegClass); + } if (MO.isUse()) { unsigned Reg = MO.getReg(); if (AMDGPU::R600_Reg128RegClass.contains(Reg)) SrcMI = Reg; else - SrcMI = TRI.getMatchingSuperReg(Reg, - TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)), + SrcMI = TRI->getMatchingSuperReg(Reg, + TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)), &AMDGPU::R600_Reg128RegClass); } } - if ((DstRegs.find(SrcMI) == DstRegs.end()) && - (SrcRegs.find(DstMI) == SrcRegs.end())) { - SrcRegs.insert(SrcMI); + if ((DstRegs.find(SrcMI) == DstRegs.end())) { DstRegs.insert(DstMI); return true; } else @@ -144,16 +150,16 @@ private: std::vector<MachineInstr *> ClauseContent; unsigned AluInstCount = 0; bool IsTex = TII->usesTextureCache(ClauseHead); - std::set<unsigned> DstRegs, SrcRegs; + std::set<unsigned> DstRegs; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; - if (AluInstCount > MaxFetchInst) + if (AluInstCount >= MaxFetchInst) break; if ((IsTex && !TII->usesTextureCache(I)) || (!IsTex && !TII->usesVertexCache(I))) break; - if (!isCompatibleWithClause(I, DstRegs, SrcRegs)) + if (!isCompatibleWithClause(I, DstRegs)) break; AluInstCount ++; ClauseContent.push_back(I); @@ -165,29 +171,27 @@ private: return ClauseFile(MIb, ClauseContent); } - void getLiteral(MachineInstr *MI, std::vector<unsigned> &Lits) const { + void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const { unsigned LiteralRegs[] = { AMDGPU::ALU_LITERAL_X, AMDGPU::ALU_LITERAL_Y, AMDGPU::ALU_LITERAL_Z, AMDGPU::ALU_LITERAL_W }; - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) + const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs = + TII->getSrcs(MI); + for (unsigned i = 0, e = Srcs.size(); i < e; ++i) { + if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X) continue; - if (MO.getReg() != AMDGPU::ALU_LITERAL_X) - continue; - unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM); - int64_t Imm = MI->getOperand(ImmIdx).getImm(); - std::vector<unsigned>::iterator It = + int64_t Imm = Srcs[i].second; + std::vector<int64_t>::iterator It = std::find(Lits.begin(), Lits.end(), Imm); if (It != Lits.end()) { unsigned Index = It - Lits.begin(); - MO.setReg(LiteralRegs[Index]); + Srcs[i].first->setReg(LiteralRegs[Index]); } else { assert(Lits.size() < 4 && "Too many literals in Instruction Group"); - MO.setReg(LiteralRegs[Lits.size()]); + Srcs[i].first->setReg(LiteralRegs[Lits.size()]); Lits.push_back(Imm); } } @@ -221,7 +225,7 @@ private: } if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) break; - std::vector<unsigned> Literals; + std::vector<int64_t> Literals; if (I->isBundle()) { MachineInstr *DeleteMI = I; MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); @@ -295,37 +299,38 @@ private: } unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const { - switch (ST.device()->getGeneration()) { - case AMDGPUDeviceInfo::HD4XXX: + switch (ST.getGeneration()) { + case AMDGPUSubtarget::R600: + case AMDGPUSubtarget::R700: if (hasPush) StackSubEntry += 2; break; - case AMDGPUDeviceInfo::HD5XXX: + case AMDGPUSubtarget::EVERGREEN: if (hasPush) StackSubEntry ++; - case AMDGPUDeviceInfo::HD6XXX: + case AMDGPUSubtarget::NORTHERN_ISLANDS: StackSubEntry += 2; break; + default: llvm_unreachable("Not a VLIW4/VLIW5 GPU"); } return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4 } public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())), - TRI(TII->getRegisterInfo()), + TII (0), TRI(0), ST(tm.getSubtarget<AMDGPUSubtarget>()) { const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) - MaxFetchInst = 8; - else - MaxFetchInst = 16; + MaxFetchInst = ST.getTexVTXClauseSize(); } virtual bool runOnMachineFunction(MachineFunction &MF) { + TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo()); + TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo()); + unsigned MaxStack = 0; unsigned CurrentStack = 0; - bool hasPush; + bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; @@ -337,6 +342,7 @@ public: BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; + MaxStack = 1; } std::vector<ClauseFile> FetchClauses, AluClauses; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); @@ -354,7 +360,7 @@ public: case AMDGPU::CF_ALU_PUSH_BEFORE: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); - hasPush = true; + HasPush = true; case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); @@ -475,7 +481,7 @@ public: break; } } - MFI->StackSize = getHWStackSize(MaxStack, hasPush); + MFI->StackSize = getHWStackSize(MaxStack, HasPush); } return false; @@ -488,7 +494,7 @@ public: char R600ControlFlowFinalizer::ID = 0; -} +} // end anonymous namespace llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { |