//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// /// This pass loads scratch pointer and scratch offset into a register or a /// frame index which can be used anywhere in the program. These values will /// be used for spilling VGPRs. /// //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" using namespace llvm; namespace { class SIPrepareScratchRegs : public MachineFunctionPass { private: static char ID; public: SIPrepareScratchRegs() : MachineFunctionPass(ID) { } bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { return "SI prepare scratch registers"; } }; } // End anonymous namespace char SIPrepareScratchRegs::ID = 0; FunctionPass *llvm::createSIPrepareScratchRegs() { return new SIPrepareScratchRegs(); } bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { SIMachineFunctionInfo *MFI = MF.getInfo(); const SIInstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); MachineBasicBlock *Entry = MF.begin(); MachineBasicBlock::iterator I = Entry->begin(); DebugLoc DL = I->getDebugLoc(); // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to // run this pass. if (!MFI->hasSpilledVGPRs()) return false; unsigned ScratchPtrPreloadReg = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); unsigned ScratchOffsetPreloadReg = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); if (!Entry->isLiveIn(ScratchPtrPreloadReg)) Entry->addLiveIn(ScratchPtrPreloadReg); if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) Entry->addLiveIn(ScratchOffsetPreloadReg); // Load the scratch offset. unsigned ScratchOffsetReg = TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); int ScratchOffsetFI = -1; if (ScratchOffsetReg != AMDGPU::NoRegister) { // Found an SGPR to use MRI.setPhysRegUsed(ScratchOffsetReg); BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg) .addReg(ScratchOffsetPreloadReg); } else { // No SGPR is available, we must spill. ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4); BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE)) .addReg(ScratchOffsetPreloadReg) .addFrameIndex(ScratchOffsetFI) .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } // Now that we have the scratch pointer and offset values, we need to // add them to all the SI_SPILL_V* instructions. RegScavenger RS; unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); RS.addScavengingFrameIndex(ScratchRsrcFI); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; // Add the scratch offset reg as a live-in so that the register scavenger // doesn't re-use it. if (!MBB.isLiveIn(ScratchOffsetReg) && ScratchOffsetReg != AMDGPU::NoRegister) MBB.addLiveIn(ScratchOffsetReg); RS.enterBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; RS.forward(I); DebugLoc DL = MI.getDebugLoc(); switch(MI.getOpcode()) { default: break; case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: // Scratch resource unsigned ScratchRsrcReg = RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | 0xffffffff; // Size unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) .addExternalSymbol("SCRATCH_RSRC_DWORD0") .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) .addExternalSymbol("SCRATCH_RSRC_DWORD1") .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) .addImm(Rsrc & 0xffffffff) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) .addImm(Rsrc >> 32) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); // Scratch Offset if (ScratchOffsetReg == AMDGPU::NoRegister) { ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), ScratchOffsetReg) .addFrameIndex(ScratchOffsetFI) .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else if (!MBB.isLiveIn(ScratchOffsetReg)) { MBB.addLiveIn(ScratchOffsetReg); } if (ScratchRsrcReg == AMDGPU::NoRegister || ScratchOffsetReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF.getFunction()->getContext(); Ctx.emitError("ran out of SGPRs for spilling VGPRs"); ScratchRsrcReg = AMDGPU::SGPR0; ScratchOffsetReg = AMDGPU::SGPR0; } MI.getOperand(2).setReg(ScratchRsrcReg); MI.getOperand(2).setIsKill(true); MI.getOperand(2).setIsUndef(false); MI.getOperand(3).setReg(ScratchOffsetReg); MI.getOperand(3).setIsUndef(false); MI.getOperand(3).setIsKill(false); MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); break; } } } return true; }