diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-06 23:08:28 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-06 23:08:28 +0000 |
commit | 3492eefa4b2509c87598678a6977074a3f6a50e6 (patch) | |
tree | 6e479880fecb17ad357750d49759395f7cf2dd0a /lib/Target/R600 | |
parent | 3406d882c02a6cd1e16f4636351c23dcb68d785f (diff) | |
download | external_llvm-3492eefa4b2509c87598678a6977074a3f6a50e6.zip external_llvm-3492eefa4b2509c87598678a6977074a3f6a50e6.tar.gz external_llvm-3492eefa4b2509c87598678a6977074a3f6a50e6.tar.bz2 |
R600/SI: Use VSrc_* register classes as the default classes for types
Since the VSrc_* register classes contain both VGPRs and SGPRs, copies
that used be emitted by isel like this:
SGPR = COPY VGPR
Will now be emitted like this:
VSrC = COPY VGPR
This patch also adds a pass that tries to identify and fix situations where
a VGPR to SGPR copy may occur. Hopefully, these changes will make it
impossible for the compiler to generate illegal VGPR to SGPR copies.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187831 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600')
-rw-r--r-- | lib/Target/R600/AMDGPU.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 24 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIFixSGPRCopies.cpp | 152 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 28 |
5 files changed, 163 insertions, 44 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 51d0d3c..6b374cb 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -36,6 +36,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); +FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 38a5f24..f222901 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -302,7 +302,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { @@ -816,28 +816,6 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { E = CurDAG->allnodes_end(); I != E; ++I) { SDNode *Node = I; - switch (Node->getOpcode()) { - // Fix the register class in copy to CopyToReg nodes - ISel will always - // use SReg classes for 64-bit copies, but this is not always what we want. - case ISD::CopyToReg: { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - SDValue Val = Node->getOperand(2); - const TargetRegisterClass *RC = RegInfo->getRegClass(Reg); - if (RC != &AMDGPU::SReg_64RegClass) { - continue; - } - - if (!Val.getNode()->isMachineOpcode() || - Val.getNode()->getMachineOpcode() == AMDGPU::IMPLICIT_DEF) { - continue; - } - - const MCInstrDesc Desc = TM.getInstrInfo()->get(Val.getNode()->getMachineOpcode()); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - RegInfo->setRegClass(Reg, TRI->getRegClass(Desc.OpInfo[0].RegClass)); - continue; - } - } MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); if (!MachineNode) diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 1a30496..5ebc5f2 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -146,6 +146,8 @@ bool AMDGPUPassConfig::addPreRegAlloc() { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createR600VectorRegMerger(*TM)); + } else { + addPass(createSIFixSGPRCopiesPass(*TM)); } return false; } diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp new file mode 100644 index 0000000..435172a --- /dev/null +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -0,0 +1,152 @@ +//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Copies from VGPR to SGPR registers are illegal and the register coalescer +/// will sometimes generate these illegal copies in situations like this: +/// +/// Register Class <vsrc> is the union of <vgpr> and <sgpr> +/// +/// BB0: +/// %vreg0 <sgpr> = SCALAR_INST +/// %vreg1 <vsrc> = COPY %vreg0 <sgpr> +/// ... +/// BRANCH %cond BB1, BB2 +/// BB1: +/// %vreg2 <vgpr> = VECTOR_INST +/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> +/// BB2: +/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> +/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> +/// +/// +/// The coalescer will begin at BB0 and eliminate its copy, then the resulting +/// code will look like this: +/// +/// BB0: +/// %vreg0 <sgpr> = SCALAR_INST +/// ... +/// BRANCH %cond BB1, BB2 +/// BB1: +/// %vreg2 <vgpr> = VECTOR_INST +/// %vreg3 <vsrc> = COPY %vreg2 <vgpr> +/// BB2: +/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> +/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> +/// +/// Now that the result of the PHI instruction is an SGPR, the register +/// allocator is now forced to constrain the register class of %vreg3 to +/// <sgpr> so we end up with final code like this: +/// +/// BB0: +/// %vreg0 <sgpr> = SCALAR_INST +/// ... +/// BRANCH %cond BB1, BB2 +/// BB1: +/// %vreg2 <vgpr> = VECTOR_INST +/// %vreg3 <sgpr> = COPY %vreg2 <vgpr> +/// BB2: +/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> +/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> +/// +/// Now this code contains an illegal copy from a VGPR to an SGPR. +/// +/// In order to avoid this problem, this pass searches for PHI instructions +/// which define a <vsrc> register and constrains its definition class to +/// <vgpr> if the user of the PHI's definition register is a vector instruction. +/// If the PHI's definition class is constrained to <vgpr> then the coalescer +/// will be unable to perform the COPY removal from the above example which +/// ultimately led to the creation of an illegal COPY. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +class SIFixSGPRCopies : public MachineFunctionPass { + +private: + static char ID; + const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg) const; + +public: + SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Fix SGPR copies"; + } + +}; + +} // End anonymous namespace + +char SIFixSGPRCopies::ID = 0; + +FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { + return new SIFixSGPRCopies(tm); +} + +/// This functions walks the use/def chains starting with the definition of +/// \p Reg until it finds an Instruction that isn't a COPY returns +/// the register class of that instruction. +const TargetRegisterClass *SIFixSGPRCopies::inferRegClass( + const TargetRegisterInfo *TRI, + const MachineRegisterInfo &MRI, + unsigned Reg) const { + // The Reg parameter to the function must always be defined by either a PHI + // or a COPY, therefore it cannot be a physical register. + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Reg cannot be a physical register"); + + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MRI.use_end(); I != E; ++I) { + switch (I->getOpcode()) { + case AMDGPU::COPY: + RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI, + I->getOperand(0).getReg())); + break; + } + } + + return RC; +} + +bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() != AMDGPU::PHI) { + continue; + } + unsigned Reg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg); + if (RC == &AMDGPU::VSrc_32RegClass) { + MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass); + } + } + } + return false; +} diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index a53e0b9..c64027f 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -32,7 +32,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM) { addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); @@ -41,14 +41,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); - addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass); + addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); + addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); @@ -1042,20 +1042,6 @@ MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N, switch (N->getMachineOpcode()) { default: return N; - case AMDGPU::REG_SEQUENCE: { - // MVT::i128 only use SGPRs, so i128 REG_SEQUENCEs don't need to be - // rewritten. - if (N->getValueType(0) == MVT::i128) { - return N; - } - const SDValue Ops[] = { - DAG.getTargetConstant(AMDGPU::VReg_64RegClassID, MVT::i32), - N->getOperand(1) , N->getOperand(2), - N->getOperand(3), N->getOperand(4) - }; - return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::i64, Ops); - } - case AMDGPU::S_LOAD_DWORD_IMM: NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64; // Fall-through |