aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Konig <christian.koenig@amd.com>2013-02-26 17:52:03 +0000
committerChristian Konig <christian.koenig@amd.com>2013-02-26 17:52:03 +0000
commit6fd49bc89adb515ae389f34f18b3c52c13b579fa (patch)
treee135a5aa1b49327b35f6764051e5891dd6ab5690
parenta25b8d4c0b4f6422c58c0ca5c003757bfbf636ac (diff)
downloadexternal_llvm-6fd49bc89adb515ae389f34f18b3c52c13b579fa.zip
external_llvm-6fd49bc89adb515ae389f34f18b3c52c13b579fa.tar.gz
external_llvm-6fd49bc89adb515ae389f34f18b3c52c13b579fa.tar.bz2
R600/SI: fix and cleanup SI register definition v2
Prevent producing real strange tablegen code by using proper register sizes, alignments and hierarchy. Also cleanup the unused definitions and add some comments. v2: add SGPR 512 bit registers, stop registers from wrapping around, fix SGPR alignment This is a candidate for the mesa-stable branch. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176098 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/SIInstructions.td5
-rw-r--r--lib/Target/R600/SIRegisterInfo.td227
2 files changed, 135 insertions, 97 deletions
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 907cf49..9701d19 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
+ [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
+ SReg_32:$src0, SReg_32:$src1))]
>;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
// f32 pattern for S_CSELECT_B32
def : Pat <
- (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
+ (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
(S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
>;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index ab36b87..9e04e24 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -1,30 +1,40 @@
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
}
-class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [sub0, sub1];
- let HWEncoding = encoding;
-}
-
-class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
-
-class VGPR_32 <bits<16> num, string name> : SIReg<name, num> {
- let HWEncoding{8} = 1;
-}
-
// Special Registers
def VCC : SIReg<"VCC", 106>;
-def EXEC_LO : SIReg <"EXEC LO", 126>;
-def EXEC_HI : SIReg <"EXEC HI", 127>;
-def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
+def EXEC : SIReg<"EXEC", 126>;
def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>;
-//Interpolation registers
+// SGPR registers
+foreach Index = 0-101 in {
+ def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+ def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+ let HWEncoding{8} = 1;
+ }
+}
+
+// virtual Interpolation registers
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
@@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">;
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-// SGPR 32-bit registers
-foreach Index = 0-101 in {
- def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
-}
+//===----------------------------------------------------------------------===//
+// Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
def SGPR_64 : RegisterTuples<[sub0, sub1],
- [(add (decimate SGPR_32, 2)),
- (add(decimate (rotl SGPR_32, 1), 2))]>;
+ [(add (decimate (trunc SGPR_32, 101), 2)),
+ (add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add (decimate SGPR_32, 4)),
- (add (decimate (rotl SGPR_32, 1), 4)),
- (add (decimate (rotl SGPR_32, 2), 4)),
- (add (decimate (rotl SGPR_32, 3), 4))]>;
+ [(add (decimate (trunc SGPR_32, 99), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (decimate SGPR_32, 8)),
- (add (decimate (rotl SGPR_32, 1), 8)),
- (add (decimate (rotl SGPR_32, 2), 8)),
- (add (decimate (rotl SGPR_32, 3), 8)),
- (add (decimate (rotl SGPR_32, 4), 8)),
- (add (decimate (rotl SGPR_32, 5), 8)),
- (add (decimate (rotl SGPR_32, 6), 8)),
- (add (decimate (rotl SGPR_32, 7), 8))]>;
+ [(add (decimate (trunc SGPR_32, 95), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (decimate (trunc SGPR_32, 87), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4)),
+ (add (decimate (shl SGPR_32, 8), 4)),
+ (add (decimate (shl SGPR_32, 9), 4)),
+ (add (decimate (shl SGPR_32, 10), 4)),
+ (add (decimate (shl SGPR_32, 11), 4)),
+ (add (decimate (shl SGPR_32, 12), 4)),
+ (add (decimate (shl SGPR_32, 13), 4)),
+ (add (decimate (shl SGPR_32, 14), 4)),
+ (add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers
-foreach Index = 0-255 in {
- def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
-}
-
def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
def VGPR_64 : RegisterTuples<[sub0, sub1],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1))]>;
+ [(add (trunc VGPR_32, 255)),
+ (add (shl VGPR_32, 1))]>;
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1)),
- (add (rotl VGPR_32, 2)),
- (add (rotl VGPR_32, 3))]>;
+ [(add (trunc VGPR_32, 253)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3))]>;
// VGPR 256-bit registers
def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1)),
- (add (rotl VGPR_32, 2)),
- (add (rotl VGPR_32, 3)),
- (add (rotl VGPR_32, 4)),
- (add (rotl VGPR_32, 5)),
- (add (rotl VGPR_32, 6)),
- (add (rotl VGPR_32, 7))]>;
+ [(add (trunc VGPR_32, 249)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7))]>;
// VGPR 512-bit registers
def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1)),
- (add (rotl VGPR_32, 2)),
- (add (rotl VGPR_32, 3)),
- (add (rotl VGPR_32, 4)),
- (add (rotl VGPR_32, 5)),
- (add (rotl VGPR_32, 6)),
- (add (rotl VGPR_32, 7)),
- (add (rotl VGPR_32, 8)),
- (add (rotl VGPR_32, 9)),
- (add (rotl VGPR_32, 10)),
- (add (rotl VGPR_32, 11)),
- (add (rotl VGPR_32, 12)),
- (add (rotl VGPR_32, 13)),
- (add (rotl VGPR_32, 14)),
- (add (rotl VGPR_32, 15))]>;
+ [(add (trunc VGPR_32, 241)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7)),
+ (add (shl VGPR_32, 8)),
+ (add (shl VGPR_32, 9)),
+ (add (shl VGPR_32, 10)),
+ (add (shl VGPR_32, 11)),
+ (add (shl VGPR_32, 12)),
+ (add (shl VGPR_32, 13)),
+ (add (shl VGPR_32, 14)),
+ (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+// Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, M0, EXEC_LO, EXEC_HI)
+ (add SGPR_32, M0Reg)
>;
-def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>;
+def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+ (add SGPR_64, VCCReg, EXECReg)
+>;
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
+
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
- (add VGPR_32,
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+
+//===----------------------------------------------------------------------===//
+// [SV]Src_* register classes, can have either an immediate or an register
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+ (add VReg_32, SReg_32,
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J,
PERSP_CENTROID_I, PERSP_CENTROID_J,
@@ -162,29 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32,
ANCILLARY,
SAMPLE_COVERAGE,
POS_FIXED_PT
- )
+ )
>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
-
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
-
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
-
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
-
-// [SV]Src_* operands can have either an immediate or an register
-def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
-
-def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>;
-
-def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
-
-def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>;
-
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
-def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;