From 6fd49bc89adb515ae389f34f18b3c52c13b579fa Mon Sep 17 00:00:00 2001 From: Christian Konig Date: Tue, 26 Feb 2013 17:52:03 +0000 Subject: R600/SI: fix and cleanup SI register definition v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent producing real strange tablegen code by using proper register sizes, alignments and hierarchy. Also cleanup the unused definitions and add some comments. v2: add SGPR 512 bit registers, stop registers from wrapping around, fix SGPR alignment This is a candidate for the mesa-stable branch. Signed-off-by: Christian König Reviewed-by: Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstructions.td | 5 +- lib/Target/R600/SIRegisterInfo.td | 227 ++++++++++++++++++++++---------------- 2 files changed, 135 insertions(+), 97 deletions(-) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 907cf49..9701d19 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -918,14 +918,15 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; def S_CSELECT_B32 : SOP2 < 0x0000000a, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] + [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc), + SReg_32:$src0, SReg_32:$src1))] >; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; // f32 pattern for S_CSELECT_B32 def : Pat < - (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), + (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)), (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) >; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index ab36b87..9e04e24 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -1,30 +1,40 @@ +//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the SI registers +//===----------------------------------------------------------------------===// class SIReg encoding = 0> : Register { let Namespace = "AMDGPU"; let HWEncoding = encoding; } -class SI_64 subregs, bits<16> encoding> : RegisterWithSubRegs { - let Namespace = "AMDGPU"; - let SubRegIndices = [sub0, sub1]; - let HWEncoding = encoding; -} - -class SGPR_32 num, string name> : SIReg; - -class VGPR_32 num, string name> : SIReg { - let HWEncoding{8} = 1; -} - // Special Registers def VCC : SIReg<"VCC", 106>; -def EXEC_LO : SIReg <"EXEC LO", 126>; -def EXEC_HI : SIReg <"EXEC HI", 127>; -def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; +def EXEC : SIReg<"EXEC", 126>; def SCC : SIReg<"SCC", 253>; def M0 : SIReg <"M0", 124>; -//Interpolation registers +// SGPR registers +foreach Index = 0-101 in { + def SGPR#Index : SIReg <"SGPR"#Index, Index>; +} + +// VGPR registers +foreach Index = 0-255 in { + def VGPR#Index : SIReg <"VGPR"#Index, Index> { + let HWEncoding{8} = 1; + } +} + +// virtual Interpolation registers def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; @@ -50,102 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">; def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; -// SGPR 32-bit registers -foreach Index = 0-101 in { - def SGPR#Index : SGPR_32 ; -} +//===----------------------------------------------------------------------===// +// Groupings using register classes and tuples +//===----------------------------------------------------------------------===// +// SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "SGPR%u", 0, 101))>; // SGPR 64-bit registers def SGPR_64 : RegisterTuples<[sub0, sub1], - [(add (decimate SGPR_32, 2)), - (add(decimate (rotl SGPR_32, 1), 2))]>; + [(add (decimate (trunc SGPR_32, 101), 2)), + (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], - [(add (decimate SGPR_32, 4)), - (add (decimate (rotl SGPR_32, 1), 4)), - (add (decimate (rotl SGPR_32, 2), 4)), - (add (decimate (rotl SGPR_32, 3), 4))]>; + [(add (decimate (trunc SGPR_32, 99), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add (decimate SGPR_32, 8)), - (add (decimate (rotl SGPR_32, 1), 8)), - (add (decimate (rotl SGPR_32, 2), 8)), - (add (decimate (rotl SGPR_32, 3), 8)), - (add (decimate (rotl SGPR_32, 4), 8)), - (add (decimate (rotl SGPR_32, 5), 8)), - (add (decimate (rotl SGPR_32, 6), 8)), - (add (decimate (rotl SGPR_32, 7), 8))]>; + [(add (decimate (trunc SGPR_32, 95), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4))]>; + +// SGPR 512-bit registers +def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], + [(add (decimate (trunc SGPR_32, 87), 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4)), + (add (decimate (shl SGPR_32, 5), 4)), + (add (decimate (shl SGPR_32, 6), 4)), + (add (decimate (shl SGPR_32, 7), 4)), + (add (decimate (shl SGPR_32, 8), 4)), + (add (decimate (shl SGPR_32, 9), 4)), + (add (decimate (shl SGPR_32, 10), 4)), + (add (decimate (shl SGPR_32, 11), 4)), + (add (decimate (shl SGPR_32, 12), 4)), + (add (decimate (shl SGPR_32, 13), 4)), + (add (decimate (shl SGPR_32, 14), 4)), + (add (decimate (shl SGPR_32, 15), 4))]>; // VGPR 32-bit registers -foreach Index = 0-255 in { - def VGPR#Index : VGPR_32 ; -} - def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add (sequence "VGPR%u", 0, 255))>; // VGPR 64-bit registers def VGPR_64 : RegisterTuples<[sub0, sub1], - [(add VGPR_32), - (add (rotl VGPR_32, 1))]>; + [(add (trunc VGPR_32, 255)), + (add (shl VGPR_32, 1))]>; // VGPR 128-bit registers def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3))]>; + [(add (trunc VGPR_32, 253)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3))]>; // VGPR 256-bit registers def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3)), - (add (rotl VGPR_32, 4)), - (add (rotl VGPR_32, 5)), - (add (rotl VGPR_32, 6)), - (add (rotl VGPR_32, 7))]>; + [(add (trunc VGPR_32, 249)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7))]>; // VGPR 512-bit registers def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], - [(add VGPR_32), - (add (rotl VGPR_32, 1)), - (add (rotl VGPR_32, 2)), - (add (rotl VGPR_32, 3)), - (add (rotl VGPR_32, 4)), - (add (rotl VGPR_32, 5)), - (add (rotl VGPR_32, 6)), - (add (rotl VGPR_32, 7)), - (add (rotl VGPR_32, 8)), - (add (rotl VGPR_32, 9)), - (add (rotl VGPR_32, 10)), - (add (rotl VGPR_32, 11)), - (add (rotl VGPR_32, 12)), - (add (rotl VGPR_32, 13)), - (add (rotl VGPR_32, 14)), - (add (rotl VGPR_32, 15))]>; + [(add (trunc VGPR_32, 241)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4)), + (add (shl VGPR_32, 5)), + (add (shl VGPR_32, 6)), + (add (shl VGPR_32, 7)), + (add (shl VGPR_32, 8)), + (add (shl VGPR_32, 9)), + (add (shl VGPR_32, 10)), + (add (shl VGPR_32, 11)), + (add (shl VGPR_32, 12)), + (add (shl VGPR_32, 13)), + (add (shl VGPR_32, 14)), + (add (shl VGPR_32, 15))]>; + +//===----------------------------------------------------------------------===// +// Register classes used as source and destination +//===----------------------------------------------------------------------===// + +// Special register classes for predicates and the M0 register +def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>; +def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>; +def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, - (add SGPR_32, M0, EXEC_LO, EXEC_HI) + (add SGPR_32, M0Reg) >; -def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>; +def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, + (add SGPR_64, VCCReg, EXECReg) +>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; +def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>; + // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, - (add VGPR_32, +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; + +def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; + +def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; + +def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; + +//===----------------------------------------------------------------------===// +// [SV]Src_* register classes, can have either an immediate or an register +//===----------------------------------------------------------------------===// + +def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; + +def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; + +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, + (add VReg_32, SReg_32, PERSP_SAMPLE_I, PERSP_SAMPLE_J, PERSP_CENTER_I, PERSP_CENTER_J, PERSP_CENTROID_I, PERSP_CENTROID_J, @@ -162,29 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, ANCILLARY, SAMPLE_COVERAGE, POS_FIXED_PT - ) + ) >; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; - -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; - -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; - -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; - -// [SV]Src_* operands can have either an immediate or an register -def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; - -def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>; - -def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; - -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64, VReg_64)>; - -// Special register classes for predicates and the M0 register -def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; -def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; -def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; -def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; -- cgit v1.1