diff options
-rw-r--r-- | lib/Target/R600/AMDGPU.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUIndirectAddressing.cpp | 345 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.cpp | 40 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.h | 23 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 6 | ||||
-rw-r--r-- | lib/Target/R600/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.cpp | 27 | ||||
-rw-r--r-- | lib/Target/R600/R600InstrInfo.h | 13 | ||||
-rw-r--r-- | lib/Target/R600/R600Packetizer.cpp | 8 | ||||
-rw-r--r-- | lib/Target/R600/R600RegisterInfo.cpp | 7 | ||||
-rw-r--r-- | lib/Target/R600/R600RegisterInfo.td | 32 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.cpp | 18 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.h | 10 | ||||
-rw-r--r-- | test/CodeGen/R600/indirect-addressing.ll | 39 |
14 files changed, 134 insertions, 436 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index feec1c5..025b28e 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -45,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm); // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); -FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); /// \brief Creates an AMDGPU-specific Target Transformation Info pass. diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp deleted file mode 100644 index f31eed0..0000000 --- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp +++ /dev/null @@ -1,345 +0,0 @@ -//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// Instructions can use indirect addressing to index the register file as if it -/// were memory. This pass lowers RegisterLoad and RegisterStore instructions -/// to either a COPY or a MOV that uses indirect addressing. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600MachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -namespace { - -class AMDGPUIndirectAddressingPass : public MachineFunctionPass { - -private: - static char ID; - const AMDGPUInstrInfo *TII; - - bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const; - -public: - AMDGPUIndirectAddressingPass(TargetMachine &tm) : - MachineFunctionPass(ID), - TII(0) - { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "R600 Handle indirect addressing"; } - -}; - -} // End anonymous namespace - -char AMDGPUIndirectAddressingPass::ID = 0; - -FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) { - return new AMDGPUIndirectAddressingPass(tm); -} - -bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - - TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo()); - - int IndirectBegin = TII->getIndirectIndexBegin(MF); - int IndirectEnd = TII->getIndirectIndexEnd(MF); - - if (IndirectBegin == -1) { - // No indirect addressing, we can skip this pass - assert(IndirectEnd == -1); - return false; - } - - // The map keeps track of the indirect address that is represented by - // each virtual register. The key is the register and the value is the - // indirect address it uses. - std::map<unsigned, unsigned> RegisterAddressMap; - - // First pass - Lower all of the RegisterStore instructions and track which - // registers are live. - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - // This map keeps track of the current live indirect registers. - // The key is the address and the value is the register - std::map<unsigned, unsigned> LiveAddressRegisterMap; - MachineBasicBlock &MBB = *BB; - - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next) { - Next = llvm::next(I); - MachineInstr &MI = *I; - - if (!TII->isRegisterStore(MI)) { - continue; - } - - // Lower RegisterStore - - unsigned RegIndex = MI.getOperand(2).getImm(); - unsigned Channel = MI.getOperand(3).getImm(); - unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); - const TargetRegisterClass *IndirectStoreRegClass = - TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg()); - - if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { - // Direct register access. - unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg) - .addOperand(MI.getOperand(0)); - - RegisterAddressMap[DstReg] = Address; - LiveAddressRegisterMap[Address] = DstReg; - } else { - // Indirect register access. - MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I, - MI.getOperand(0).getReg(), // Value - Address, - MI.getOperand(1).getReg()); // Offset - for (int i = IndirectBegin; i <= IndirectEnd; ++i) { - unsigned Addr = TII->calculateIndirectAddress(i, Channel); - unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass); - MOV.addReg(DstReg, RegState::Define | RegState::Implicit); - RegisterAddressMap[DstReg] = Addr; - LiveAddressRegisterMap[Addr] = DstReg; - } - } - MI.eraseFromParent(); - } - - // Update the live-ins of the succesor blocks - for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(), - SuccEnd = MBB.succ_end(); - SuccEnd != Succ; ++Succ) { - std::map<unsigned, unsigned>::const_iterator Key, KeyEnd; - for (Key = LiveAddressRegisterMap.begin(), - KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) { - (*Succ)->addLiveIn(Key->second); - } - } - } - - // Second pass - Lower the RegisterLoad instructions - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - // Key is the address and the value is the register - std::map<unsigned, unsigned> LiveAddressRegisterMap; - MachineBasicBlock &MBB = *BB; - - MachineBasicBlock::livein_iterator LI = MBB.livein_begin(); - while (LI != MBB.livein_end()) { - std::vector<unsigned> PhiRegisters; - - // Make sure this live in is used for indirect addressing - if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) { - ++LI; - continue; - } - - unsigned Address = RegisterAddressMap[*LI]; - LiveAddressRegisterMap[Address] = *LI; - PhiRegisters.push_back(*LI); - - // Check if there are other live in registers which map to the same - // indirect address. - for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI), - LE = MBB.livein_end(); - LJ != LE; ++LJ) { - unsigned Reg = *LJ; - if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) { - continue; - } - - if (RegisterAddressMap[Reg] == Address) { - PhiRegisters.push_back(Reg); - } - } - - if (PhiRegisters.size() == 1) { - // We don't need to insert a Phi instruction, so we can just add the - // registers to the live list for the block. - LiveAddressRegisterMap[Address] = *LI; - MBB.removeLiveIn(*LI); - } else { - // We need to insert a PHI, because we have the same address being - // written in multiple predecessor blocks. - const TargetRegisterClass *PhiDstClass = - TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin())); - unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass); - MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(), - MBB.findDebugLoc(MBB.begin()), - TII->get(AMDGPU::PHI), PhiDstReg); - - for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(), - RE = PhiRegisters.end(); - RI != RE; ++RI) { - unsigned Reg = *RI; - MachineInstr *DefInst = MRI.getVRegDef(Reg); - assert(DefInst); - MachineBasicBlock *RegBlock = DefInst->getParent(); - Phi.addReg(Reg); - Phi.addMBB(RegBlock); - MBB.removeLiveIn(Reg); - } - RegisterAddressMap[PhiDstReg] = Address; - LiveAddressRegisterMap[Address] = PhiDstReg; - } - LI = MBB.livein_begin(); - } - - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next) { - Next = llvm::next(I); - MachineInstr &MI = *I; - - if (!TII->isRegisterLoad(MI)) { - if (MI.getOpcode() == AMDGPU::PHI) { - continue; - } - // Check for indirect register defs - for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands(); - OpIdx < NumOperands; ++OpIdx) { - MachineOperand &MO = MI.getOperand(OpIdx); - if (MO.isReg() && MO.isDef() && - RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) { - unsigned Reg = MO.getReg(); - unsigned LiveAddress = RegisterAddressMap[Reg]; - // Chain the live-ins - if (LiveAddressRegisterMap.find(LiveAddress) != - LiveAddressRegisterMap.end()) { - MI.addOperand(MachineOperand::CreateReg( - LiveAddressRegisterMap[LiveAddress], - false, // isDef - true, // isImp - true)); // isKill - } - LiveAddressRegisterMap[LiveAddress] = Reg; - } - } - continue; - } - - const TargetRegisterClass *SuperIndirectRegClass = - TII->getSuperIndirectRegClass(); - const TargetRegisterClass *IndirectLoadRegClass = - TII->getIndirectAddrLoadRegClass(); - unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass); - - unsigned RegIndex = MI.getOperand(2).getImm(); - unsigned Channel = MI.getOperand(3).getImm(); - unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel); - - if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) { - // Direct register access - unsigned Reg = LiveAddressRegisterMap[Address]; - unsigned AddrReg = IndirectLoadRegClass->getRegister(Address); - - if (regHasExplicitDef(MRI, Reg)) { - // If the register we are reading from has an explicit def, then that - // means it was written via a direct register access (i.e. COPY - // or other instruction that doesn't use indirect addressing). In - // this case we know where the value has been stored, so we can just - // issue a copy. - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), - MI.getOperand(0).getReg()) - .addReg(Reg); - } else { - // If the register we are reading has an implicit def, then that - // means it was written by an indirect register access (i.e. An - // instruction that uses indirect addressing. - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), - MI.getOperand(0).getReg()) - .addReg(AddrReg) - .addReg(Reg, RegState::Implicit); - } - } else { - // Indirect register access - - // Note on REQ_SEQUENCE instructions: You can't actually use the register - // it defines unless you have an instruction that takes the defined - // register class as an operand. - - MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I), - TII->get(AMDGPU::REG_SEQUENCE), - IndirectReg); - for (int i = IndirectBegin; i <= IndirectEnd; ++i) { - unsigned Addr = TII->calculateIndirectAddress(i, Channel); - if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) { - continue; - } - unsigned Reg = LiveAddressRegisterMap[Addr]; - - // We only need to use REG_SEQUENCE for explicit defs, since the - // register coalescer won't do anything with the implicit defs. - if (!regHasExplicitDef(MRI, Reg)) { - continue; - } - - // Insert a REQ_SEQUENCE instruction to force the register allocator - // to allocate the virtual register to the correct physical register. - Sequence.addReg(LiveAddressRegisterMap[Addr]); - Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr)); - } - MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I, - MI.getOperand(0).getReg(), // Value - Address, - MI.getOperand(1).getReg()); // Offset - - - - Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill); - Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit); - - } - MI.eraseFromParent(); - } - } - return false; -} - -bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI, - unsigned Reg) const { - MachineInstr *DefInstr = MRI.getVRegDef(Reg); - - if (!DefInstr) { - return false; - } - - if (DefInstr->getOpcode() == AMDGPU::PHI) { - bool Explicit = false; - for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(), - E = DefInstr->operands_end(); - I != E; ++I) { - const MachineOperand &MO = *I; - if (!MO.isReg() || MO.isDef()) { - continue; - } - - Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg()); - } - return Explicit; - } - - return DefInstr->getOperand(0).isReg() && - DefInstr->getOperand(0).getReg() == Reg; -} diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index bb7f97f..434c91a 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -118,6 +118,46 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(!"Not Implemented"); } +bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + switch(MI->getOpcode()) { + default: + if (isRegisterLoad(*MI)) { + unsigned RegIndex = MI->getOperand(2).getImm(); + unsigned Channel = MI->getOperand(3).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI->getOperand(1).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, MI->getOperand(0).getReg(), + getIndirectAddrRegClass()->getRegister(Address)); + } else { + buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(), + Address, OffsetReg); + } + } else if (isRegisterStore(*MI)) { + unsigned RegIndex = MI->getOperand(2).getImm(); + unsigned Channel = MI->getOperand(3).getImm(); + unsigned Address = calculateIndirectAddress(RegIndex, Channel); + unsigned OffsetReg = MI->getOperand(1).getReg(); + if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { + buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), + MI->getOperand(0).getReg()); + } else { + buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(), + calculateIndirectAddress(RegIndex, Channel), + OffsetReg); + } + } else { + return false; + } + } + + MBB->erase(MI); + return true; +} + + MachineInstr * AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index c83e57d..dc65d4e 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -87,6 +87,8 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, @@ -160,14 +162,9 @@ public: virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const = 0; - /// \returns The register class to be used for storing values to an - /// "Indirect Address" . - virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( - unsigned SourceReg) const = 0; - - /// \returns The register class to be used for loading values from - /// an "Indirect Address" . - virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0; + /// \returns The register class to be used for loading and storing values + /// from an "Indirect Address" . + virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0; /// \brief Build instruction(s) for an indirect register write. /// @@ -185,19 +182,21 @@ public: unsigned ValueReg, unsigned Address, unsigned OffsetReg) const = 0; - /// \returns the register class whose sub registers are the set of all - /// possible registers that can be used for indirect addressing. - virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0; - /// \brief Convert the AMDIL MachineInstr to a supported ISA /// MachineInstr virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const; + /// \brief Build a MOV instruction. + virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg) const = 0; + /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the /// equivalent opcode that writes \p Channels Channels. int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const; + }; namespace AMDGPU { diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index cd7a616..9722e7d 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -139,12 +139,6 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); - - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - // This callbacks this pass uses are not implemented yet on SI. - addPass(createAMDGPUIndirectAddressingPass(*TM)); - } return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 7bdfa7e..9f8f6a8 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen AMDILISelLowering.cpp AMDGPUAsmPrinter.cpp AMDGPUFrameLowering.cpp - AMDGPUIndirectAddressing.cpp AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 005f642..a11d54a 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -210,6 +210,14 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } +bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { + return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; +} + +bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { + return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; +} + bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { if (!isALUInstr(MI->getOpcode())) { return false; @@ -1086,13 +1094,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, return RegIndex; } -const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( - unsigned SourceReg) const { - return &AMDGPU::R600_TReg32RegClass; -} - -const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { - return &AMDGPU::TRegMemRegClass; +const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { + return &AMDGPU::R600_TReg32_XRegClass; } MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, @@ -1131,10 +1134,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, return Mov; } -const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { - return &AMDGPU::IndirectRegRegClass; -} - unsigned R600InstrInfo::getMaxAlusPerClause() const { return 115; } @@ -1272,6 +1271,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, return MovImm; } +MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg) const { + return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); +} + int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { return getOperandIdx(MI.getOpcode(), Op); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index 2e36f05..d7438ef 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -82,6 +82,8 @@ namespace llvm { bool usesTextureCache(const MachineInstr *MI) const; bool mustBeLastInClause(unsigned Opcode) const; + bool usesAddressRegister(MachineInstr *MI) const; + bool definesAddressRegister(MachineInstr *MI) const; bool readsLDSSrcReg(const MachineInstr *MI) const; /// \returns The operand index for the given source number. Legal values @@ -203,10 +205,7 @@ namespace llvm { virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const; - virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( - unsigned SourceReg) const; - - virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + virtual const TargetRegisterClass *getIndirectAddrRegClass() const; virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, @@ -218,8 +217,6 @@ namespace llvm { unsigned ValueReg, unsigned Address, unsigned OffsetReg) const; - virtual const TargetRegisterClass *getSuperIndirectRegClass() const; - unsigned getMaxAlusPerClause() const; ///buildDefaultInstruction - This function returns a MachineInstr with @@ -246,6 +243,10 @@ namespace llvm { unsigned DstReg, uint64_t Imm) const; + MachineInstr *buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg) const; + /// \brief Get the index of Op in the MachineInstr. /// /// \returns -1 if the Instruction does not contain the specified \p Op. diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index 03d8d87..cd9b6ea 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -206,6 +206,14 @@ public: return false; } } + + bool ARDef = TII->definesAddressRegister(MII) || + TII->definesAddressRegister(MIJ); + bool ARUse = TII->usesAddressRegister(MII) || + TII->usesAddressRegister(MIJ); + if (ARDef && ARUse) + return false; + return true; } diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index 4dc63fe..dd8f3ef 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -41,18 +41,13 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::PRED_SEL_OFF); Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); + Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { Reserved.set(*I); } - for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(), - E = AMDGPU::TRegMemRegClass.end(); - I != E; ++I) { - Reserved.set(*I); - } - const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF); diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 6fec43c..68bcd20 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -39,8 +39,6 @@ foreach Index = 0-127 in { // Indirect addressing offset registers def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan, Index, Chan>; - def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, - Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#"", @@ -210,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, (add (sequence "T%u_XY", 0, 63))>; - -//===----------------------------------------------------------------------===// -// Register classes for indirect addressing -//===----------------------------------------------------------------------===// - -// Super register for all the Indirect Registers. This register class is used -// by the REG_SEQUENCE instruction to specify the registers to use for direct -// reads / writes which may be written / read by an indirect address. -class IndirectSuper<string n, list<Register> subregs> : - RegisterWithSubRegs<n, subregs> { - let Namespace = "AMDGPU"; - let SubRegIndices = - [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, - sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15]; -} - -def IndirectSuperReg : IndirectSuper<"Indirect", - [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X, - TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X, - TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X] ->; - -def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>; - -// This register class defines the registers that are the storage units for -// the "Indirect Addressing" pseudo memory space. -// XXX: Only use the X channel, until we support wider stack widths -def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32, - (add (sequence "TRegMem%u_X", 0, 16)) ->; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index d3ad06b..8b35d89 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -197,6 +197,13 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, return MI; } +MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, + unsigned SrcReg) const { + assert(!"Not Implemented"); +} + bool SIInstrInfo::isMov(unsigned Opcode) const { switch(Opcode) { default: return false; @@ -346,12 +353,7 @@ int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { llvm_unreachable("Unimplemented"); } -const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass( - unsigned SourceReg) const { - llvm_unreachable("Unimplemented"); -} - -const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const { +const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { llvm_unreachable("Unimplemented"); } @@ -370,7 +372,3 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead( unsigned Address, unsigned OffsetReg) const { llvm_unreachable("Unimplemented"); } - -const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const { - llvm_unreachable("Unimplemented"); -} diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 906befd..72bb25e 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -41,6 +41,9 @@ public: bool NewMI=false) const; virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} + MachineInstr *buildMovInstr(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg) const; virtual bool isMov(unsigned Opcode) const; virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; @@ -62,10 +65,7 @@ public: virtual unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const; - virtual const TargetRegisterClass *getIndirectAddrStoreRegClass( - unsigned SourceReg) const; - - virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const; + virtual const TargetRegisterClass *getIndirectAddrRegClass() const; virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, @@ -78,8 +78,6 @@ public: unsigned ValueReg, unsigned Address, unsigned OffsetReg) const; - - virtual const TargetRegisterClass *getSuperIndirectRegClass() const; }; namespace AMDGPU { diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/indirect-addressing.ll index bd72cd9..1ef6c35 100644 --- a/test/CodeGen/R600/indirect-addressing.ll +++ b/test/CodeGen/R600/indirect-addressing.ll @@ -63,3 +63,42 @@ entry: store i32 %0, i32 addrspace(1)* %out ret void } + +; Test direct access of a private array inside a loop. The private array +; loads and stores should be lowered to copies, so there shouldn't be any +; MOVA instructions. + +; CHECK: @direct_loop +; CHECK-NOT: MOVA_INT + +define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %prv_array_const = alloca [2 x i32] + %prv_array = alloca [2 x i32] + %a = load i32 addrspace(1)* %in + %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %b = load i32 addrspace(1)* %b_src_ptr + %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0 + store i32 %a, i32* %a_dst_ptr + %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1 + store i32 %b, i32* %b_dst_ptr + br label %for.body + +for.body: + %inc = phi i32 [0, %entry], [%count, %for.body] + %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0 + %x = load i32* %x_ptr + %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0 + %y = load i32* %y_ptr + %xy = add i32 %x, %y + store i32 %xy, i32* %y_ptr + %count = add i32 %inc, 1 + %done = icmp eq i32 %count, 4095 + br i1 %done, label %for.end, label %for.body + +for.end: + %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0 + %value = load i32* %value_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} |