aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/R600/SIISelLowering.cpp88
-rw-r--r--lib/Target/R600/SIISelLowering.h2
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp21
-rw-r--r--lib/Target/R600/SIRegisterInfo.h4
-rw-r--r--test/CodeGen/R600/bfi_int.ll2
-rw-r--r--test/CodeGen/R600/llvm.SI.imageload.ll20
-rw-r--r--test/CodeGen/R600/llvm.SI.sample.ll32
-rw-r--r--test/CodeGen/R600/llvm.SI.sampled.ll32
-rw-r--r--test/CodeGen/R600/lshl.ll2
-rw-r--r--test/CodeGen/R600/lshr.ll2
10 files changed, 128 insertions, 77 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0be0d03..a53e0b9 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -673,43 +673,67 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
return false;
}
+const TargetRegisterClass *SITargetLowering::getRegClassForNode(
+ SelectionDAG &DAG, const SDValue &Op) const {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+ if (!Op->isMachineOpcode()) {
+ switch(Op->getOpcode()) {
+ case ISD::CopyFromReg: {
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return MRI.getRegClass(Reg);
+ }
+ return TRI.getPhysRegClass(Reg);
+ }
+ default: return NULL;
+ }
+ }
+ const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
+ int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+ if (OpClassID != -1) {
+ return TRI.getRegClass(OpClassID);
+ }
+ switch(Op.getMachineOpcode()) {
+ case AMDGPU::COPY_TO_REGCLASS:
+ // Operand 1 is the register class id for COPY_TO_REGCLASS instructions.
+ OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+
+ // If the COPY_TO_REGCLASS instruction is copying to a VSrc register
+ // class, then the register class for the value could be either a
+ // VReg or and SReg. In order to get a more accurate
+ if (OpClassID == AMDGPU::VSrc_32RegClassID ||
+ OpClassID == AMDGPU::VSrc_64RegClassID) {
+ return getRegClassForNode(DAG, Op.getOperand(0));
+ }
+ return TRI.getRegClass(OpClassID);
+ case AMDGPU::EXTRACT_SUBREG: {
+ int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const TargetRegisterClass *SuperClass =
+ getRegClassForNode(DAG, Op.getOperand(0));
+ return TRI.getSubClassWithSubReg(SuperClass, SubIdx);
+ }
+ case AMDGPU::REG_SEQUENCE:
+ // Operand 0 is the register class id for REG_SEQUENCE instructions.
+ return TRI.getRegClass(
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue());
+ default:
+ return getRegClassFor(Op.getSimpleValueType());
+ }
+}
+
/// \brief Does "Op" fit into register class "RegClass" ?
bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const {
-
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- SDNode *Node = Op.getNode();
-
- const TargetRegisterClass *OpClass;
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
- int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
- if (OpClassID == -1) {
- switch (MN->getMachineOpcode()) {
- case AMDGPU::REG_SEQUENCE:
- // Operand 0 is the register class id for REG_SEQUENCE instructions.
- OpClass = TRI->getRegClass(
- cast<ConstantSDNode>(MN->getOperand(0))->getZExtValue());
- break;
- default:
- OpClass = getRegClassFor(Op.getSimpleValueType());
- break;
- }
- } else {
- OpClass = TRI->getRegClass(OpClassID);
- }
-
- } else if (Node->getOpcode() == ISD::CopyFromReg) {
- RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
- OpClass = MRI.getRegClass(Reg->getReg());
-
- } else
+ const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
+ if (!RC) {
return false;
-
- return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
+ }
+ return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
}
/// \brief Make sure that we don't exeed the number of allowed scalars
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 08c1d17..b4202c4 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -30,6 +30,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
+ const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
+ const SDValue &Op) const;
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const;
void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index ddfc54e..50fd4c7 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -49,3 +49,24 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
case MVT::i32: return &AMDGPU::VReg_32RegClass;
}
}
+
+const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+
+ const TargetRegisterClass *BaseClasses[] = {
+ &AMDGPU::VReg_32RegClass,
+ &AMDGPU::SReg_32RegClass,
+ &AMDGPU::VReg_64RegClass,
+ &AMDGPU::SReg_64RegClass,
+ &AMDGPU::SReg_128RegClass,
+ &AMDGPU::SReg_256RegClass
+ };
+
+ for (unsigned i = 0, e = sizeof(BaseClasses) /
+ sizeof(const TargetRegisterClass*); i != e; ++i) {
+ if (BaseClasses[i]->contains(Reg)) {
+ return BaseClasses[i];
+ }
+ }
+ return NULL;
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index c322f94..d0df4f9 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -41,6 +41,10 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+ /// \brief Return the 'base' register class for this register.
+ /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
+ const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
};
} // End namespace llvm
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index 501c556..cdccdfa 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -38,7 +38,7 @@ entry:
; R600-CHECK: @bfi_sha256_ma
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
+; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, VGPR[0-9]+}}
; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 6b321f0..0adcdfc 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -1,15 +1,15 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 2, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 1, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 4, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0
-;CHECK: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 3, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 5, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+}}, 12, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{VGPR[0-9]+}}, 8, 0, 0, -1
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index de06354..7655996 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK-DAG: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll
index 71b8ef5..3b05551 100644
--- a/test/CodeGen/R600/llvm.SI.sampled.ll
+++ b/test/CodeGen/R600/llvm.SI.sampled.ll
@@ -1,21 +1,21 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 2
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 1
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 4
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK-DAG: IMAGE_SAMPLE_D {{VGPR[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 9e29b0d..806e681 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
-;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]+}}, {{[SV]GPR[0-9]+}}, 1
+;CHECK: V_LSHL_B32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 1
define void @test(i32 %p) {
%i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index eab3fbf..cfbcc34 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
-;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]+}}, {{[SV]GPR[0-9]+}}, 1
+;CHECK: V_LSHR_B32_e64 {{VGPR[0-9]}}, SGPR{{[0-9]}}, 1
define void @test(i32 %p) {
%i = udiv i32 %p, 2