14 files changed, 134 insertions, 436 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index feec1c5..025b28e 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -45,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
 // Passes common to R600 and SI
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 
 /// \brief Creates an AMDGPU-specific Target Transformation Info pass.
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
deleted file mode 100644
index f31eed0..0000000
--- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Instructions can use indirect addressing to index the register file as if it
-/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
-/// to either a COPY or a MOV that uses indirect addressing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
-
-private:
-  static char ID;
-  const AMDGPUInstrInfo *TII;
-
-  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
-
-public:
-  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
-    MachineFunctionPass(ID),
-    TII(0)
-    { }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-
-  const char *getPassName() const { return "R600 Handle indirect addressing"; }
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUIndirectAddressingPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
-  return new AMDGPUIndirectAddressingPass(tm);
-}
-
-bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo());
-
-  int IndirectBegin = TII->getIndirectIndexBegin(MF);
-  int IndirectEnd = TII->getIndirectIndexEnd(MF);
-
-  if (IndirectBegin == -1) {
-    // No indirect addressing, we can skip this pass
-    assert(IndirectEnd == -1);
-    return false;
-  }
-
-  // The map keeps track of the indirect address that is represented by
-  // each virtual register. The key is the register and the value is the
-  // indirect address it uses.
-  std::map<unsigned, unsigned> RegisterAddressMap;
-
-  // First pass - Lower all of the RegisterStore instructions and track which
-  // registers are live.
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                      BB != BB_E; ++BB) {
-    // This map keeps track of the current live indirect registers.
-    // The key is the address and the value is the register
-    std::map<unsigned, unsigned> LiveAddressRegisterMap;
-    MachineBasicBlock &MBB = *BB;
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-                               I != MBB.end(); I = Next) {
-      Next = llvm::next(I);
-      MachineInstr &MI = *I;
-
-      if (!TII->isRegisterStore(MI)) {
-        continue;
-      }
-
-      // Lower RegisterStore
-
-      unsigned RegIndex = MI.getOperand(2).getImm();
-      unsigned Channel = MI.getOperand(3).getImm();
-      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-      const TargetRegisterClass *IndirectStoreRegClass =
-                   TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
-
-      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
-        // Direct register access.
-        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-
-        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
-                .addOperand(MI.getOperand(0));
-
-        RegisterAddressMap[DstReg] = Address;
-        LiveAddressRegisterMap[Address] = DstReg;
-      } else {
-        // Indirect register access.
-        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
-                                           MI.getOperand(0).getReg(), // Value
-                                           Address,
-                                           MI.getOperand(1).getReg()); // Offset
-        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
-          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
-          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
-          RegisterAddressMap[DstReg] = Addr;
-          LiveAddressRegisterMap[Addr] = DstReg;
-        }
-      }
-      MI.eraseFromParent();
-    }
-
-    // Update the live-ins of the succesor blocks
-    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
-                                          SuccEnd = MBB.succ_end();
-                                          SuccEnd != Succ; ++Succ) {
-      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
-      for (Key = LiveAddressRegisterMap.begin(),
-           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
-        (*Succ)->addLiveIn(Key->second);
-      }
-    }
-  }
-
-  // Second pass - Lower the RegisterLoad instructions
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                      BB != BB_E; ++BB) {
-    // Key is the address and the value is the register
-    std::map<unsigned, unsigned> LiveAddressRegisterMap;
-    MachineBasicBlock &MBB = *BB;
-
-    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
-    while (LI != MBB.livein_end()) {
-      std::vector<unsigned> PhiRegisters;
-
-      // Make sure this live in is used for indirect addressing
-      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
-        ++LI;
-        continue;
-      }
-
-      unsigned Address = RegisterAddressMap[*LI];
-      LiveAddressRegisterMap[Address] = *LI;
-      PhiRegisters.push_back(*LI);
-
-      // Check if there are other live in registers which map to the same
-      // indirect address.
-      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
-                                              LE = MBB.livein_end();
-                                              LJ != LE; ++LJ) {
-        unsigned Reg = *LJ;
-        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
-          continue;
-        }
-
-        if (RegisterAddressMap[Reg] == Address) {
-          PhiRegisters.push_back(Reg);
-        }
-      }
-
-      if (PhiRegisters.size() == 1) {
-        // We don't need to insert a Phi instruction, so we can just add the
-        // registers to the live list for the block.
-        LiveAddressRegisterMap[Address] = *LI;
-        MBB.removeLiveIn(*LI);
-      } else {
-        // We need to insert a PHI, because we have the same address being
-        // written in multiple predecessor blocks.
-        const TargetRegisterClass *PhiDstClass =
-                   TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
-        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
-        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
-                                          MBB.findDebugLoc(MBB.begin()),
-                                          TII->get(AMDGPU::PHI), PhiDstReg);
-
-        for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
-                                                   RE = PhiRegisters.end();
-                                                   RI != RE; ++RI) {
-          unsigned Reg = *RI;
-          MachineInstr *DefInst = MRI.getVRegDef(Reg);
-          assert(DefInst);
-          MachineBasicBlock *RegBlock = DefInst->getParent();
-          Phi.addReg(Reg);
-          Phi.addMBB(RegBlock);
-          MBB.removeLiveIn(Reg);
-        }
-        RegisterAddressMap[PhiDstReg] = Address;
-        LiveAddressRegisterMap[Address] = PhiDstReg;
-      }
-      LI = MBB.livein_begin();
-    }
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-                               I != MBB.end(); I = Next) {
-      Next = llvm::next(I);
-      MachineInstr &MI = *I;
-
-      if (!TII->isRegisterLoad(MI)) {
-        if (MI.getOpcode() == AMDGPU::PHI) {
-          continue;
-        }
-        // Check for indirect register defs
-        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
-                                 OpIdx < NumOperands; ++OpIdx) {
-          MachineOperand &MO = MI.getOperand(OpIdx);
-          if (MO.isReg() && MO.isDef() &&
-              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
-            unsigned Reg = MO.getReg();
-            unsigned LiveAddress = RegisterAddressMap[Reg];
-            // Chain the live-ins
-            if (LiveAddressRegisterMap.find(LiveAddress) !=
-                LiveAddressRegisterMap.end()) {
-              MI.addOperand(MachineOperand::CreateReg(
-                                  LiveAddressRegisterMap[LiveAddress],
-                                  false, // isDef
-                                  true,  // isImp
-                                  true));  // isKill
-            }
-            LiveAddressRegisterMap[LiveAddress] = Reg;
-          }
-        }
-        continue;
-      }
-
-      const TargetRegisterClass *SuperIndirectRegClass =
-                                                TII->getSuperIndirectRegClass();
-      const TargetRegisterClass *IndirectLoadRegClass =
-                                             TII->getIndirectAddrLoadRegClass();
-      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
-
-      unsigned RegIndex = MI.getOperand(2).getImm();
-      unsigned Channel = MI.getOperand(3).getImm();
-      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-
-      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
-        // Direct register access
-        unsigned Reg = LiveAddressRegisterMap[Address];
-        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
-
-        if (regHasExplicitDef(MRI, Reg)) {
-          // If the register we are reading from has an explicit def, then that
-          // means it was written via a direct register access (i.e. COPY
-          // or other instruction that doesn't use indirect addressing).  In
-          // this case we know where the value has been stored, so we can just
-          // issue a copy.
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
-                  MI.getOperand(0).getReg())
-                  .addReg(Reg);
-        } else {
-          // If the register we are reading has an implicit def, then that
-          // means it was written by an indirect register access (i.e. An
-          // instruction that uses indirect addressing. 
-          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
-                   MI.getOperand(0).getReg())
-                   .addReg(AddrReg)
-                   .addReg(Reg, RegState::Implicit);
-        }
-      } else {
-        // Indirect register access
-
-        // Note on REQ_SEQUENCE instructions: You can't actually use the register
-        // it defines unless  you have an instruction that takes the defined
-        // register class as an operand.
-
-        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
-                                               TII->get(AMDGPU::REG_SEQUENCE),
-                                               IndirectReg);
-        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
-          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
-          if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
-            continue;
-          }
-          unsigned Reg = LiveAddressRegisterMap[Addr];
-
-          // We only need to use REG_SEQUENCE for explicit defs, since the
-          // register coalescer won't do anything with the implicit defs.
-          if (!regHasExplicitDef(MRI, Reg)) {
-            continue;
-          }
-
-          // Insert a REQ_SEQUENCE instruction to force the register allocator
-          // to allocate the virtual register to the correct physical register.
-          Sequence.addReg(LiveAddressRegisterMap[Addr]);
-          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
-        }
-        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
-                                           MI.getOperand(0).getReg(), // Value
-                                           Address,
-                                           MI.getOperand(1).getReg()); // Offset
-
-
-
-        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
-        Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
-
-      }
-      MI.eraseFromParent();
-    }
-  }
-  return false;
-}
-
-bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
-                                                  unsigned Reg) const {
-  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-
-  if (!DefInstr) {
-    return false;
-  }
-
-  if (DefInstr->getOpcode() == AMDGPU::PHI) {
-    bool Explicit = false;
-    for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
-                                          E = DefInstr->operands_end();
-                                          I != E; ++I) {
-      const MachineOperand &MO = *I;
-      if (!MO.isReg() || MO.isDef()) {
-        continue;
-      }
-
-      Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
-    }
-    return Explicit;
-  }
-
-  return DefInstr->getOperand(0).isReg() &&
-         DefInstr->getOperand(0).getReg() == Reg;
-}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index bb7f97f..434c91a 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -118,6 +118,46 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   assert(!"Not Implemented");
 }
 
+bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  switch(MI->getOpcode()) {
+  default:
+    if (isRegisterLoad(*MI)) {
+      unsigned RegIndex = MI->getOperand(2).getImm();
+      unsigned Channel = MI->getOperand(3).getImm();
+      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+      unsigned OffsetReg = MI->getOperand(1).getReg();
+      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+        buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
+                      getIndirectAddrRegClass()->getRegister(Address));
+      } else {
+        buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
+                          Address, OffsetReg);
+      }
+    } else if (isRegisterStore(*MI)) {
+      unsigned RegIndex = MI->getOperand(2).getImm();
+      unsigned Channel = MI->getOperand(3).getImm();
+      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+      unsigned OffsetReg = MI->getOperand(1).getReg();
+      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+        buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
+                      MI->getOperand(0).getReg());
+      } else {
+        buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
+                         calculateIndirectAddress(RegIndex, Channel),
+                         OffsetReg);
+      }
+    } else {
+      return false;
+    }
+  }
+
+  MBB->erase(MI);
+  return true;
+}
+
+
 MachineInstr *
 AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                       MachineInstr *MI,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index c83e57d..dc65d4e 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -87,6 +87,8 @@ public:
                             unsigned DestReg, int FrameIndex,
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const;
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
 
 protected:
   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
@@ -160,14 +162,9 @@ public:
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const = 0;
 
-  /// \returns The register class to be used for storing values to an
-  /// "Indirect Address" .
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                  unsigned SourceReg) const = 0;
-
-  /// \returns The register class to be used for loading values from
-  /// an "Indirect Address" .
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+  /// \returns The register class to be used for loading and storing values
+  /// from an "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
 
   /// \brief Build instruction(s) for an indirect register write.
   ///
@@ -185,19 +182,21 @@ public:
                                     unsigned ValueReg, unsigned Address,
                                     unsigned OffsetReg) const = 0;
 
-  /// \returns the register class whose sub registers are the set of all
-  /// possible registers that can be used for indirect addressing.
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
-
 
   /// \brief Convert the AMDIL MachineInstr to a supported ISA
   /// MachineInstr
   virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
     DebugLoc DL) const;
 
+  /// \brief Build a MOV instruction.
+  virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator I,
+                                      unsigned DstReg, unsigned SrcReg) const = 0;
+
   /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
   /// equivalent opcode that writes \p Channels Channels.
   int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+
 };
 
 namespace AMDGPU {
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index cd7a616..9722e7d 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -139,12 +139,6 @@ AMDGPUPassConfig::addPreISel() {
 
 bool AMDGPUPassConfig::addInstSelector() {
   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-
-  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    // This callbacks this pass uses are not implemented yet on SI.
-    addPass(createAMDGPUIndirectAddressingPass(*TM));
-  }
   return false;
 }
 
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 7bdfa7e..9f8f6a8 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
   AMDILISelLowering.cpp
   AMDGPUAsmPrinter.cpp
   AMDGPUFrameLowering.cpp
-  AMDGPUIndirectAddressing.cpp
   AMDGPUISelDAGToDAG.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUMachineFunction.cpp
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 005f642..a11d54a 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -210,6 +210,14 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
   }
 }
 
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+  return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
   if (!isALUInstr(MI->getOpcode())) {
     return false;
@@ -1086,13 +1094,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
   return RegIndex;
 }
 
-const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
-                                                     unsigned SourceReg) const {
-  return &AMDGPU::R600_TReg32RegClass;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
-  return &AMDGPU::TRegMemRegClass;
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+  return &AMDGPU::R600_TReg32_XRegClass;
 }
 
 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1131,10 +1134,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   return Mov;
 }
 
-const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
-  return &AMDGPU::IndirectRegRegClass;
-}
-
 unsigned R600InstrInfo::getMaxAlusPerClause() const {
   return 115;
 }
@@ -1272,6 +1271,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
   return MovImm;
 }
 
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned DstReg, unsigned SrcReg) const {
+  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+}
+
 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
   return getOperandIdx(MI.getOpcode(), Op);
 }
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 2e36f05..d7438ef 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -82,6 +82,8 @@ namespace llvm {
   bool usesTextureCache(const MachineInstr *MI) const;
 
   bool mustBeLastInClause(unsigned Opcode) const;
+  bool usesAddressRegister(MachineInstr *MI) const;
+  bool definesAddressRegister(MachineInstr *MI) const;
   bool readsLDSSrcReg(const MachineInstr *MI) const;
 
   /// \returns The operand index for the given source number.  Legal values
@@ -203,10 +205,7 @@ namespace llvm {
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const;
 
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                      unsigned SourceReg) const;
-
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
 
   virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
                                   MachineBasicBlock::iterator I,
@@ -218,8 +217,6 @@ namespace llvm {
                                   unsigned ValueReg, unsigned Address,
                                   unsigned OffsetReg) const;
 
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
-
   unsigned getMaxAlusPerClause() const;
 
   ///buildDefaultInstruction - This function returns a MachineInstr with
@@ -246,6 +243,10 @@ namespace llvm {
                                   unsigned DstReg,
                                   uint64_t Imm) const;
 
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const;
+
   /// \brief Get the index of Op in the MachineInstr.
   ///
   /// \returns -1 if the Instruction does not contain the specified \p Op.
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 03d8d87..cd9b6ea 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -206,6 +206,14 @@ public:
         return false;
       }
     }
+
+    bool ARDef = TII->definesAddressRegister(MII) ||
+                 TII->definesAddressRegister(MIJ);
+    bool ARUse = TII->usesAddressRegister(MII) ||
+                 TII->usesAddressRegister(MIJ);
+    if (ARDef && ARUse)
+      return false;
+
     return true;
   }
 
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index 4dc63fe..dd8f3ef 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -41,18 +41,13 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AMDGPU::PRED_SEL_OFF);
   Reserved.set(AMDGPU::PRED_SEL_ZERO);
   Reserved.set(AMDGPU::PRED_SEL_ONE);
+  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
 
   for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
                         E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
     Reserved.set(*I);
   }
 
-  for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
-                                     E = AMDGPU::TRegMemRegClass.end();
-                                     I !=  E; ++I) {
-    Reserved.set(*I);
-  }
-
   const R600InstrInfo *RII =
     static_cast<const R600InstrInfo*>(TM.getInstrInfo());
   std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 6fec43c..68bcd20 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -39,8 +39,6 @@ foreach Index = 0-127 in {
     // Indirect addressing offset registers
     def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
                                               Index, Chan>;
-    def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
-                                                Chan>;
   }
   // 128-bit Temporary Registers
   def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
@@ -210,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
 
 def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
                                 (add (sequence "T%u_XY", 0, 63))>;
-
-//===----------------------------------------------------------------------===//
-// Register classes for indirect addressing
-//===----------------------------------------------------------------------===//
-
-// Super register for all the Indirect Registers.  This register class is used
-// by the REG_SEQUENCE instruction to specify the registers to use for direct
-// reads / writes which may be written / read by an indirect address.
-class IndirectSuper<string n, list<Register> subregs> :
-    RegisterWithSubRegs<n, subregs> {
-  let Namespace = "AMDGPU";
-  let SubRegIndices =
- [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
-  sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
-}
-
-def IndirectSuperReg : IndirectSuper<"Indirect",
-  [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
-   TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
-   TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
->;
-
-def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
-
-// This register class defines the registers that are the storage units for
-// the "Indirect Addressing" pseudo memory space.
-// XXX: Only use the X channel, until we support wider stack widths
-def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
-  (add (sequence "TRegMem%u_X", 0, 16))
->;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index d3ad06b..8b35d89 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -197,6 +197,13 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
   return MI;
 }
 
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned DstReg,
+                                         unsigned SrcReg) const {
+  assert(!"Not Implemented");
+}
+
 bool SIInstrInfo::isMov(unsigned Opcode) const {
   switch(Opcode) {
   default: return false;
@@ -346,12 +353,7 @@ int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
   llvm_unreachable("Unimplemented");
 }
 
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
-                                                     unsigned SourceReg) const {
-  llvm_unreachable("Unimplemented");
-}
-
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
   llvm_unreachable("Unimplemented");
 }
 
@@ -370,7 +372,3 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
                                    unsigned Address, unsigned OffsetReg) const {
   llvm_unreachable("Unimplemented");
 }
-
-const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
-  llvm_unreachable("Unimplemented");
-}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 906befd..72bb25e 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -41,6 +41,9 @@ public:
                                            bool NewMI=false) const;
 
   virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+  MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg) const;
   virtual bool isMov(unsigned Opcode) const;
 
   virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
@@ -62,10 +65,7 @@ public:
   virtual unsigned calculateIndirectAddress(unsigned RegIndex,
                                             unsigned Channel) const;
 
-  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
-                                                      unsigned SourceReg) const;
-
-  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+  virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
 
   virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
                                                  MachineBasicBlock::iterator I,
@@ -78,8 +78,6 @@ public:
                                                 unsigned ValueReg,
                                                 unsigned Address,
                                                 unsigned OffsetReg) const;
-
-  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
   };
 
 namespace AMDGPU {
diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/indirect-addressing.ll
index bd72cd9..1ef6c35 100644
--- a/test/CodeGen/R600/indirect-addressing.ll
+++ b/test/CodeGen/R600/indirect-addressing.ll
@@ -63,3 +63,42 @@ entry:
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
+
+; Test direct access of a private array inside a loop.  The private array
+; loads and stores should be lowered to copies, so there shouldn't be any
+; MOVA instructions.
+
+; CHECK: @direct_loop
+; CHECK-NOT: MOVA_INT
+
+define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %prv_array_const = alloca [2 x i32]
+  %prv_array = alloca [2 x i32]
+  %a = load i32 addrspace(1)* %in
+  %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %b = load i32 addrspace(1)* %b_src_ptr
+  %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  store i32 %a, i32* %a_dst_ptr
+  %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+  store i32 %b, i32* %b_dst_ptr
+  br label %for.body
+
+for.body:
+  %inc = phi i32 [0, %entry], [%count, %for.body]
+  %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  %x = load i32* %x_ptr
+  %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %y = load i32* %y_ptr
+  %xy = add i32 %x, %y
+  store i32 %xy, i32* %y_ptr
+  %count = add i32 %inc, 1
+  %done = icmp eq i32 %count, 4095
+  br i1 %done, label %for.end, label %for.body
+
+for.end:
+  %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %value = load i32* %value_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}