diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:24:32 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:24:32 +0000 |
commit | 38d5e1c36d954f1ff6489f58efd1d4865217cf9b (patch) | |
tree | 451454dd8bf6ea5ec2f3ea021da2c7f6de4a928a /lib/Target/R600/AMDGPUISelDAGToDAG.cpp | |
parent | 636298ba64fd07d4ddcae6005e7fc1db43eb5335 (diff) | |
download | external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.zip external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.gz external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.bz2 |
R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2
Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG
instructions should make it easier for the register allocator to coalasce
unnecessary copies.
v2:
- Use an SGPR register class if all the operands of BUILD_VECTOR are
SGPRs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188427 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/R600/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 88 |
1 files changed, 69 insertions, 19 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index d339b09..22bdb90 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -285,35 +285,85 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } case ISD::BUILD_VECTOR: { + unsigned RegClassID; const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - break; + const AMDGPURegisterInfo *TRI = + static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); + const SIRegisterInfo *SIRI = + static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + assert(VT.getVectorElementType().bitsEq(MVT::i32)); + if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + bool UseVReg = true; + for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); + U != E; ++U) { + if (!U->isMachineOpcode()) { + continue; + } + const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); + if (!RC) { + continue; + } + if (SIRI->isSGPRClass(RC)) { + UseVReg = false; + } + } + switch(NumVectorElts) { + case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : + AMDGPU::SReg_32RegClassID; + break; + case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : + AMDGPU::SReg_64RegClassID; + break; + case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : + AMDGPU::SReg_128RegClassID; + break; + case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : + AMDGPU::SReg_256RegClassID; + break; + case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : + AMDGPU::SReg_512RegClassID; + break; + } + } else { + // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + switch(NumVectorElts) { + case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; + case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; + default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } } - unsigned RegClassID; - switch(N->getValueType(0).getVectorNumElements()) { - case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; - case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); + + if (NumVectorElts == 1) { + return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, + VT.getVectorElementType(), + N->getOperand(0), RegClass); } - // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - SDValue RegSeqArgs[9] = { - CurDAG->getTargetConstant(RegClassID, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), - SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) - }; + + assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " + "supported yet"); + // 16 = Max Num Vector Elements + // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) + // 1 = Vector Register Class + SDValue RegSeqArgs[16 * 2 + 1]; + + RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; for (unsigned i = 0; i < N->getNumOperands(); i++) { + // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; break; } - RegSeqArgs[2 * i + 1] = N->getOperand(i); + RegSeqArgs[1 + (2 * i)] = N->getOperand(i); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } if (!IsRegSeq) break; |