R600/SI: Lower BUILD_VECTOR to REG_SEQUENCE v2

Using REG_SEQUENCE for BUILD_VECTOR rather than a series of INSERT_SUBREG instructions should make it easier for the register allocator to coalasce unnecessary copies. v2: - Use an SGPR register class if all the operands of BUILD_VECTOR are SGPRs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188427 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2013-08-14 23:24:32 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2013-08-14 23:24:32 +0000
commit: 38d5e1c36d954f1ff6489f58efd1d4865217cf9b (patch)
tree: 451454dd8bf6ea5ec2f3ea021da2c7f6de4a928a /lib/Target/R600/AMDGPUISelDAGToDAG.cpp
parent: 636298ba64fd07d4ddcae6005e7fc1db43eb5335 (diff)
download: external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.zip
external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.gz
external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.bz2
1 files changed, 69 insertions, 19 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index d339b09..22bdb90 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -285,35 +285,85 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     break;
   }
   case ISD::BUILD_VECTOR: {
+    unsigned RegClassID;
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
-      break;
+    const AMDGPURegisterInfo *TRI =
+                   static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
+    const SIRegisterInfo *SIRI =
+                   static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+    EVT VT = N->getValueType(0);
+    unsigned NumVectorElts = VT.getVectorNumElements();
+    assert(VT.getVectorElementType().bitsEq(MVT::i32));
+    if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      bool UseVReg = true;
+      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+                                                    U != E; ++U) {
+        if (!U->isMachineOpcode()) {
+          continue;
+        }
+        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+        if (!RC) {
+          continue;
+        }
+        if (SIRI->isSGPRClass(RC)) {
+          UseVReg = false;
+        }
+      }
+      switch(NumVectorElts) {
+      case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
+                                     AMDGPU::SReg_32RegClassID;
+        break;
+      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
+                                     AMDGPU::SReg_64RegClassID;
+        break;
+      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
+                                     AMDGPU::SReg_128RegClassID;
+        break;
+      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
+                                     AMDGPU::SReg_256RegClassID;
+        break;
+      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
+                                      AMDGPU::SReg_512RegClassID;
+        break;
+      }
+    } else {
+      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+      // that adds a 128 bits reg copy when going through TwoAddressInstructions
+      // pass. We want to avoid 128 bits copies as much as possible because they
+      // can't be bundled by our scheduler.
+      switch(NumVectorElts) {
+      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+      case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+      }
     }
 
-    unsigned RegClassID;
-    switch(N->getValueType(0).getVectorNumElements()) {
-    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
-    case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
-    default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+
+    if (NumVectorElts == 1) {
+      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
+                                  VT.getVectorElementType(),
+                                  N->getOperand(0), RegClass);
     }
-    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
-    // that adds a 128 bits reg copy when going through TwoAddressInstructions
-    // pass. We want to avoid 128 bits copies as much as possible because they
-    // can't be bundled by our scheduler.
-    SDValue RegSeqArgs[9] = {
-      CurDAG->getTargetConstant(RegClassID, MVT::i32),
-      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
-      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
-      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
-      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
-    };
+
+    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+                                  "supported yet");
+    // 16 = Max Num Vector Elements
+    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+    // 1 = Vector Register Class
+    SDValue RegSeqArgs[16 * 2 + 1];
+
+    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
     bool IsRegSeq = true;
     for (unsigned i = 0; i < N->getNumOperands(); i++) {
+      // XXX: Why is this here?
       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
         IsRegSeq = false;
         break;
       }
-      RegSeqArgs[2 * i + 1] = N->getOperand(i);
+      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+      RegSeqArgs[1 + (2 * i) + 1] =
+              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
     }
     if (!IsRegSeq)
       break;
author	Tom Stellard <thomas.stellard@amd.com>	2013-08-14 23:24:32 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2013-08-14 23:24:32 +0000
commit	38d5e1c36d954f1ff6489f58efd1d4865217cf9b (patch)
tree	451454dd8bf6ea5ec2f3ea021da2c7f6de4a928a /lib/Target/R600/AMDGPUISelDAGToDAG.cpp
parent	636298ba64fd07d4ddcae6005e7fc1db43eb5335 (diff)
download	external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.zip external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.gz external_llvm-38d5e1c36d954f1ff6489f58efd1d4865217cf9b.tar.bz2