diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPU.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 5 | ||||
-rw-r--r-- | lib/Target/R600/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600OptimizeVectorRegisters.cpp | 363 |
4 files changed, 0 insertions, 370 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index d3ef426..f9d70c9 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,7 +23,6 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600TextureIntrinsicsReplacer(); FunctionPass* createR600KernelParametersPass(const DataLayout *TD); -FunctionPass *createR600VectorRegMerger(TargetMachine &tm); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); FunctionPass *createR600Packetizer(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index c52af56..88dc583 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -130,11 +130,6 @@ bool AMDGPUPassConfig::addInstSelector() { bool AMDGPUPassConfig::addPreRegAlloc() { addPass(createAMDGPUConvertToISAPass(*TM)); - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { - addPass(createR600VectorRegMerger(*TM)); - } return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 558d001..c5ce9dc 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -41,7 +41,6 @@ add_llvm_target(R600CodeGen R600ISelLowering.cpp R600MachineFunctionInfo.cpp R600MachineScheduler.cpp - R600OptimizeVectorRegisters.cpp R600Packetizer.cpp R600RegisterInfo.cpp R600TextureIntrinsicsReplacer.cpp diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp deleted file mode 100644 index 6811dbc..0000000 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ /dev/null @@ -1,363 +0,0 @@ -//===--------------------- R600MergeVectorRegisters.cpp -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass merges inputs of swizzeable instructions into vector sharing -/// common data and/or have enough undef subreg using swizzle abilities. -/// -/// For instance let's consider the following pseudo code : -/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 -/// ... -/// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 -/// -/// is turned into : -/// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 -/// ... -/// vreg7<def> = INSERT_SUBREG vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 -/// -/// This allow regalloc to reduce register pressure for vector registers and -/// to reduce MOV count. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "vec-merger" -#include "llvm/Support/Debug.h" -#include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -static bool -isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { - for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg), - E = MRI.def_end(); It != E; ++It) { - return (*It).isImplicitDef(); - } - llvm_unreachable("Reg without a def"); - return false; -} - -class RegSeqInfo { -public: - MachineInstr *Instr; - DenseMap<unsigned, unsigned> RegToChan; - std::vector<unsigned> UndefReg; - RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { - assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); - for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { - MachineOperand &MO = Instr->getOperand(i); - unsigned Chan = Instr->getOperand(i + 1).getImm(); - if (isImplicitlyDef(MRI, MO.getReg())) - UndefReg.push_back(Chan); - else - RegToChan[MO.getReg()] = Chan; - } - } - RegSeqInfo() {} - - bool operator==(const RegSeqInfo &RSI) const { - return RSI.Instr == Instr; - } -}; - -class R600VectorRegMerger : public MachineFunctionPass { -private: - MachineRegisterInfo *MRI; - const R600InstrInfo *TII; - bool canSwizzle(const MachineInstr &) const; - bool areAllUsesSwizzeable(unsigned Reg) const; - void SwizzleInput(MachineInstr &, - const std::vector<std::pair<unsigned, unsigned> > &) const; - bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, - std::vector<std::pair<unsigned, unsigned> > &Remap) const; - bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, - std::vector<std::pair<unsigned, unsigned> > &RemapChan); - bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, - std::vector<std::pair<unsigned, unsigned> > &RemapChan); - MachineInstr *RebuildVector(RegSeqInfo *MI, - const RegSeqInfo *BaseVec, - const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; - void RemoveMI(MachineInstr *); - void trackRSI(const RegSeqInfo &RSI); - - typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; - DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; - InstructionSetMap PreviousRegSeqByReg; - InstructionSetMap PreviousRegSeqByUndefCount; -public: - static char ID; - R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - const char *getPassName() const { - return "R600 Vector Registers Merge Pass"; - } - - bool runOnMachineFunction(MachineFunction &Fn); -}; - -char R600VectorRegMerger::ID = 0; - -bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) - const { - if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) - return true; - switch (MI.getOpcode()) { - case AMDGPU::R600_ExportSwz: - case AMDGPU::EG_ExportSwz: - return true; - default: - return false; - } -} - -bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, - RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) - const { - unsigned CurrentUndexIdx = 0; - for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), - E = ToMerge->RegToChan.end(); It != E; ++It) { - DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = - Untouched->RegToChan.find((*It).first); - if (PosInUntouched != Untouched->RegToChan.end()) { - Remap.push_back(std::pair<unsigned, unsigned> - ((*It).second, (*PosInUntouched).second)); - continue; - } - if (CurrentUndexIdx >= Untouched->UndefReg.size()) - return false; - Remap.push_back(std::pair<unsigned, unsigned> - ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); - } - - return true; -} - -MachineInstr *R600VectorRegMerger::RebuildVector( - RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, - const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { - unsigned Reg = RSI->Instr->getOperand(0).getReg(); - MachineBasicBlock::iterator Pos = RSI->Instr; - MachineBasicBlock &MBB = *Pos->getParent(); - DebugLoc DL = Pos->getDebugLoc(); - - unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); - DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; - std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; - for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), - E = RSI->RegToChan.end(); It != E; ++It) { - if (BaseRSI->RegToChan.find((*It).first) != BaseRSI->RegToChan.end()) { - UpdatedRegToChan[(*It).first] = (*It).second; - continue; - } - unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); - unsigned SubReg = (*It).first; - unsigned Swizzle = (*It).second; - unsigned Chan; - for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { - if (RemapChan[j].first == Swizzle) { - Chan = RemapChan[j].second; - break; - } - } - MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), - DstReg) - .addReg(SrcVec) - .addReg(SubReg) - .addImm(Chan); - UpdatedRegToChan[SubReg] = Chan; - for (std::vector<unsigned>::iterator RemoveIt = UpdatedUndef.begin(), - RemoveE = UpdatedUndef.end(); RemoveIt != RemoveE; ++ RemoveIt) { - if (*RemoveIt == Chan) - UpdatedUndef.erase(RemoveIt); - } - DEBUG(dbgs() << " ->"; Tmp->dump();); - SrcVec = DstReg; - } - Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) - .addReg(SrcVec); - DEBUG(dbgs() << " ->"; Pos->dump();); - - DEBUG(dbgs() << " Updating Swizzle:\n"); - for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), - E = MRI->use_end(); It != E; ++It) { - DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); - SwizzleInput(*It, RemapChan); - DEBUG((*It).dump()); - } - RSI->Instr->eraseFromParent(); - - // Update RSI - RSI->Instr = Pos; - RSI->RegToChan = UpdatedRegToChan; - RSI->UndefReg = UpdatedUndef; - - return Pos; -} - -void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { - for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), - E = PreviousRegSeqByReg.end(); It != E; ++It) { - std::vector<MachineInstr *> &MIs = (*It).second; - MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); - } - for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), - E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { - std::vector<MachineInstr *> &MIs = (*It).second; - MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); - } -} - -void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, - const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { - unsigned Offset; - if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) - Offset = 2; - else - Offset = 3; - for (unsigned i = 0; i < 4; i++) { - unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; - for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { - if (RemapChan[j].first == Swizzle) { - MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); - break; - } - } - } -} - -bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), - E = MRI->use_end(); It != E; ++It) { - if (!canSwizzle(*It)) - return false; - } - return true; -} - -bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, - RegSeqInfo &CompatibleRSI, - std::vector<std::pair<unsigned, unsigned> > &RemapChan) { - for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), - MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { - if (!MOp->isReg()) - continue; - if (PreviousRegSeqByReg[MOp->getReg()].empty()) - continue; - std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; - for (unsigned i = 0, e = MIs.size(); i < e; i++) { - CompatibleRSI = PreviousRegSeq[MIs[i]]; - if (RSI == CompatibleRSI) - continue; - if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) - return true; - } - } - return false; -} - -bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, - RegSeqInfo &CompatibleRSI, - std::vector<std::pair<unsigned, unsigned> > &RemapChan) { - unsigned NeededUndefs = 4 - RSI.UndefReg.size(); - if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) - return false; - std::vector<MachineInstr *> &MIs = - PreviousRegSeqByUndefCount[NeededUndefs]; - CompatibleRSI = PreviousRegSeq[MIs.back()]; - tryMergeVector(&CompatibleRSI, &RSI, RemapChan); - return true; -} - -void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { - for (DenseMap<unsigned, unsigned>::const_iterator - It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { - PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); - } - PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); - PreviousRegSeq[RSI.Instr] = RSI; -} - -bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { - MRI = &(Fn.getRegInfo()); - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock *MB = MBB; - PreviousRegSeq.clear(); - PreviousRegSeqByReg.clear(); - PreviousRegSeqByUndefCount.clear(); - - for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); - MII != MIIE; ++MII) { - MachineInstr *MI = MII; - if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) - continue; - - RegSeqInfo RSI(*MRI, MI); - - // All uses of MI are swizzeable ? - unsigned Reg = MI->getOperand(0).getReg(); - if (!areAllUsesSwizzeable(Reg)) - continue; - - DEBUG (dbgs() << "Trying to optimize "; - MI->dump(); - ); - - RegSeqInfo CandidateRSI; - std::vector<std::pair<unsigned, unsigned> > RemapChan; - DEBUG(dbgs() << "Using common slots...\n";); - if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { - // Remove CandidateRSI mapping - RemoveMI(CandidateRSI.Instr); - MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); - trackRSI(RSI); - continue; - } - DEBUG(dbgs() << "Using free slots...\n";); - RemapChan.clear(); - if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { - RemoveMI(CandidateRSI.Instr); - MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); - trackRSI(RSI); - continue; - } - //Failed to merge - trackRSI(RSI); - } - } - return false; -} - -} - -llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { - return new R600VectorRegMerger(tm); -} - |