aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/R600/SIInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIInstructions.td')
-rw-r--r--lib/Target/R600/SIInstructions.td433
1 files changed, 318 insertions, 115 deletions
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 76f05eb..5232139 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -22,9 +22,16 @@ def InterpSlot : Operand<i32> {
let PrintMethod = "printInterpSlot";
}
+def SendMsgImm : Operand<i32> {
+ let PrintMethod = "printSendMsg";
+}
+
def isSI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+def isCI : Predicate<"Subtarget.getGeneration() "
+ ">= AMDGPUSubtarget::SEA_ISLANDS">;
+
def WAIT_FLAG : InstFlag<"printWaitFlag">;
let Predicates = [isSI] in {
@@ -401,11 +408,25 @@ def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
+def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "DS_WRITE_B64", VReg_64>;
+
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
+def DS_READ_B64 : DS_Load_Helper <0x00000076, "DS_READ_B64", VReg_64>;
+
+// 2 forms.
+def DS_WRITE2_B32 : DS_Load2_Helper <0x0000000E, "DS_WRITE2_B32", VReg_64>;
+def DS_WRITE2_B64 : DS_Load2_Helper <0x0000004E, "DS_WRITE2_B64", VReg_128>;
+
+def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "DS_READ2_B32", VReg_64>;
+def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>;
+
+// TODO: DS_READ2ST64_B32, DS_READ2ST64_B64,
+// DS_WRITE2ST64_B32, DS_WRITE2ST64_B64
+
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
@@ -624,7 +645,18 @@ let neverHasSideEffects = 1, isMoveImm = 1 in {
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
} // End neverHasSideEffects = 1, isMoveImm = 1
-defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+let Uses = [EXEC] in {
+
+def V_READFIRSTLANE_B32 : VOP1 <
+ 0x00000002,
+ (outs SReg_32:$vdst),
+ (ins VReg_32:$src0),
+ "V_READFIRSTLANE_B32 $vdst, $src0",
+ []
+>;
+
+}
+
defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
[(set i32:$dst, (fp_to_sint f64:$src0))]
>;
@@ -826,17 +858,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
[]
>;
-} // End hasSideEffects
//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+
+let Uses = [EXEC] in {
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+ > {
+ let DisableEncoding = "$m0";
+ }
+} // End Uses = [EXEC]
+
//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
@@ -864,20 +904,21 @@ def : Pat <
(EXTRACT_SUBREG $val, sub0)
>;
-//use two V_CNDMASK_B32_e64 instructions for f64
-def : Pat <
- (f64 (select i1:$src2, f64:$src1, f64:$src0)),
- (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub0),
- (EXTRACT_SUBREG $src1, sub0),
- $src2), sub0),
- (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub1),
- (EXTRACT_SUBREG $src1, sub1),
- $src2), sub1)
+def V_READLANE_B32 : VOP2 <
+ 0x00000001,
+ (outs SReg_32:$vdst),
+ (ins VReg_32:$src0, SSrc_32:$vsrc1),
+ "V_READLANE_B32 $vdst, $src0, $vsrc1",
+ []
>;
-defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
-defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+def V_WRITELANE_B32 : VOP2 <
+ 0x00000002,
+ (outs VReg_32:$vdst),
+ (ins SReg_32:$src0, SSrc_32:$vsrc1),
+ "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
+ []
+>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
@@ -924,51 +965,32 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
->;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
->;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
->;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
->;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
- [(set i32:$dst, (srl i32:$src0, i32:$src1))]
->;
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
- [(set i32:$dst, (sra i32:$src0, i32:$src1))]
->;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
- [(set i32:$dst, (shl i32:$src0, i32:$src1))]
->;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set i32:$dst, (and i32:$src0, i32:$src1))]
->;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set i32:$dst, (or i32:$src0, i32:$src1))]
->;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set i32:$dst, (xor i32:$src0, i32:$src1))]
->;
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
@@ -979,14 +1001,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
+ "V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
+ "V_SUBB_U32">;
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
@@ -1033,9 +1057,16 @@ def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
-def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
-def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
-def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+
+let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
+ [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
+ [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
+}
+
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
+ [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
defm : BFIPatterns <V_BFI_B32>;
def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
@@ -1154,10 +1185,18 @@ def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
} // End Uses = [SCC]
} // End Defs = [SCC]
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
+ [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
+ [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
+ [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
+ [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
@@ -1167,7 +1206,9 @@ def S_CSELECT_B32 : SOP2 <
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
+ [(set i32:$dst, (and i32:$src0, i32:$src1))]
+>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
@@ -1178,13 +1219,23 @@ def : Pat <
(S_AND_B64 $src0, $src1)
>;
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
+ [(set i32:$dst, (or i32:$src0, i32:$src1))]
+>;
+
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
+ [(set i64:$dst, (or i64:$src0, i64:$src1))]
+>;
+
def : Pat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
>;
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
+ [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+>;
+
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
>;
@@ -1305,8 +1356,8 @@ def SI_END_CF : InstSI <
def SI_KILL : InstSI <
(outs),
- (ins VReg_32:$src),
- "SI_KIL $src",
+ (ins VSrc_32:$src),
+ "SI_KILL $src",
[(int_AMDGPU_kill f32:$src)]
>;
@@ -1315,13 +1366,13 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
+//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri, ADDRIndirect>;
let UseNamedOperandTable = 1 in {
def SI_RegisterLoad : AMDGPUShaderInst <
(outs VReg_32:$dst, SReg_64:$temp),
- (ins FRAMEri64:$addr, i32imm:$chan),
+ (ins FRAMEri32:$addr, i32imm:$chan),
"", []
> {
let isRegisterLoad = 1;
@@ -1330,7 +1381,7 @@ def SI_RegisterLoad : AMDGPUShaderInst <
class SIRegStore<dag outs> : AMDGPUShaderInst <
outs,
- (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
+ (ins VReg_32:$val, FRAMEri32:$addr, i32imm:$chan),
"", []
> {
let isRegisterStore = 1;
@@ -1397,13 +1448,13 @@ def : Pat<
def : Pat <
(int_AMDGPU_kilp),
- (SI_KILL (V_MOV_B32_e32 0xbf800000))
+ (SI_KILL 0xbf800000)
>;
/* int_SI_vs_load_input */
def : Pat<
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
/* int_SI_export */
@@ -1637,17 +1688,25 @@ def : BitConvert <f64, i64, VReg_64>;
def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
def : BitConvert <v2i32, i64, VReg_64>;
+def : BitConvert <i64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
def : BitConvert <v4i32, i128, VReg_128>;
def : BitConvert <i128, v4i32, VReg_128>;
+def : BitConvert <v8f32, v8i32, SReg_256>;
+def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8i32, v32i8, SReg_256>;
def : BitConvert <v32i8, v8i32, SReg_256>;
def : BitConvert <v8i32, v32i8, VReg_256>;
+def : BitConvert <v8i32, v8f32, VReg_256>;
+def : BitConvert <v8f32, v8i32, VReg_256>;
def : BitConvert <v32i8, v8i32, VReg_256>;
+def : BitConvert <v16i32, v16f32, VReg_512>;
+def : BitConvert <v16f32, v16i32, VReg_512>;
+
/********** =================== **********/
/********** Src & Dst modifiers **********/
/********** =================== **********/
@@ -1658,16 +1717,30 @@ def : Pat <
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>;
+/********** ================================ **********/
+/********** Floating point absolute/negative **********/
+/********** ================================ **********/
+
+// Manipulate the sign bit directly, as e.g. using the source negation modifier
+// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
+// breaking the piglit *s-floatBitsToInt-neg* tests
+
+// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
+// removing these patterns
+
+def : Pat <
+ (fneg (fabs f32:$src)),
+ (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+>;
+
def : Pat <
(fabs f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
>;
def : Pat <
(fneg f32:$src),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
>;
/********** ================== **********/
@@ -1794,10 +1867,18 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
+class Ext32Pat <SDNode ext> : Pat <
+ (i32 (ext i1:$src0)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+>;
+
+def : Ext32Pat <zext>;
+def : Ext32Pat <anyext>;
+
// 1. Offset as 8bit DWORD immediate
def : Pat <
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
>;
// 2. Offset loaded in an 32bit SGPR
@@ -1809,7 +1890,7 @@ def : Pat <
// 3. Offset in an 32Bit VGPR
def : Pat <
(SIload_constant i128:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
+ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -1839,35 +1920,47 @@ def : Pat <
/********** Load/Store Patterns **********/
/********** ======================= **********/
-class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag i32:$src0),
- (vt (inst 0, $src0, $src0, $src0, 0, 0))
->;
+multiclass DSReadPat <DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
+ (inst (i1 0), $ptr, (as_i16imm $offset))
+ >;
-def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
-def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
-def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-def : DSReadPat <DS_READ_B32, i32, local_load>;
-def : Pat <
- (local_load i32:$src0),
- (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
->;
+ def : Pat <
+ (frag i32:$src0),
+ (vt (inst 0, $src0, 0))
+ >;
+}
-class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
- (frag i32:$src1, i32:$src0),
- (inst 0, $src0, $src1, $src1, 0, 0)
->;
+defm : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+defm : DSReadPat <DS_READ_B32, i32, local_load>;
+defm : DSReadPat <DS_READ_B64, i64, local_load>;
-def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ >;
+
+ def : Pat <
+ (frag vt:$src1, i32:$src0),
+ (inst 0, $src0, $src1, 0)
+ >;
+}
+
+defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
+defm : DSWritePat <DS_WRITE_B64, i64, local_store>;
def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
- (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
+ (DS_ADD_U32_RTN 0, $ptr, $val, 0)>;
def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
- (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
+ (DS_SUB_U32_RTN 0, $ptr, $val, 0)>;
/********** ================== **********/
/********** SMRD Patterns **********/
@@ -1877,8 +1970,8 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
- (vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
+ (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
+ (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
>;
// 2. Offset loaded in an 32bit SGPR
@@ -1911,6 +2004,11 @@ defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag global_ld, PatFrag constant_ld> {
def : Pat <
+ (vt (global_ld (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset))),
+ (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
+ >;
+
+ def : Pat <
(vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))),
(Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
>;
@@ -1953,6 +2051,16 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
def : Pat <
+ (st vt:$value, (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset)),
+ (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
+ >;
+
+ def : Pat <
+ (st vt:$value, (add i64:$ptr, IMM12bit:$offset)),
+ (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
+ >;
+
+ def : Pat <
(st vt:$value, i64:$ptr),
(Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
>;
@@ -1970,6 +2078,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
+ MUBUF bothen> {
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm, 1, 0, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ imm:$offset, 0, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (as_i1imm $slc), (as_i1imm $tfe))
+ >;
+
+ def : Pat <
+ (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ imm, 1, 1, imm:$glc, imm:$slc,
+ imm:$tfe)),
+ (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (as_i1imm $tfe))
+ >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+ BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+ BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+ BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
@@ -1991,11 +2143,60 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
+let Predicates = [isCI] in {
+
+// Sea island new arithmetic instructinos
+let neverHasSideEffects = 1 in {
+defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
+ [(set f64:$dst, (ftrunc f64:$src0))]
+>;
+defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
+ [(set f64:$dst, (fceil f64:$src0))]
+>;
+defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
+ [(set f64:$dst, (ffloor f64:$src0))]
+>;
+
+defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>;
+
+def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
+def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
+def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
+def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
+
+// XXX - Does this set VCC?
+def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
+} // End neverHasSideEffects = 1
+
+// Remaining instructions:
+// FLAT_*
+// S_CBRANCH_CDBGUSER
+// S_CBRANCH_CDBGSYS
+// S_CBRANCH_CDBGSYS_OR_USER
+// S_CBRANCH_CDBGSYS_AND_USER
+// S_DCACHE_INV_VOL
+// V_EXP_LEGACY_F32
+// V_LOG_LEGACY_F32
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
+
+} // End Predicates = [isCI]
+
+
/********** ====================== **********/
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
// 1. Extract with offset
def : Pat<
@@ -2011,21 +2212,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
// 3. Insert with offset
def : Pat<
- (vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)),
+ (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
(IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
- (vector_insert vt:$vec, f32:$val, i32:$idx),
+ (vector_insert vt:$vec, eltvt:$val, i32:$idx),
(IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+
+defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
/********** =============== **********/
/********** Conditions **********/
@@ -2057,6 +2263,11 @@ def : Pat <
(EXTRACT_SUBREG $a, sub0)
>;
+def : Pat <
+ (i1 (trunc i32:$a)),
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+>;
+
// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
@@ -2064,14 +2275,6 @@ def : Pat <
(S_ADD_I32 $src0, $src1)
>;
-def : Pat <
- (or i64:$a, i64:$b),
- (INSERT_SUBREG
- (INSERT_SUBREG (IMPLICIT_DEF),
- (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub0), (EXTRACT_SUBREG $b, sub0)), sub0),
- (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub1), (EXTRACT_SUBREG $b, sub1)), sub1)
->;
-
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//