diff options
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 342 |
1 files changed, 179 insertions, 163 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7e61b18..0346e24 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; -def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), @@ -230,7 +229,7 @@ def TEX_RECT : PatLeaf< def TEX_ARRAY : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return TType == 9 || TType == 10 || TType == 15 || TType == 16; + return TType == 9 || TType == 10 || TType == 16; }] >; @@ -241,12 +240,26 @@ def TEX_SHADOW_ARRAY : PatLeaf< }] >; -class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs, - dag ins, string asm, list<dag> pattern> : +def TEX_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 14; + }] +>; + +def TEX_ARRAY_MSAA : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return TType == 15; + }] +>; + +class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, + dag outs, dag ins, string asm, list<dag> pattern> : InstR600ISA <outs, ins, asm, pattern>, CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF { - let rat_id = 0; + let rat_id = ratid; let rat_inst = ratinst; let rim = 0; // XXX: Have a separate instruction for non-indexed writes. @@ -264,6 +277,7 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs, let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } @@ -403,7 +417,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", - []>; + [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -537,6 +551,7 @@ class ExportSwzInst : InstR600ISA<( let elem_size = 3; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } } // End usesCustomInserter = 1 @@ -550,6 +565,7 @@ class ExportBufInst : InstR600ISA<( let elem_size = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; + let IsExport = 1; } //===----------------------------------------------------------------------===// @@ -573,6 +589,7 @@ i32imm:$COUNT, i32imm:$Enabled), let ALT_CONST = 0; let WHOLE_QUAD_MODE = 0; let BARRIER = 1; + let UseNamedOperandTable = 1; let Inst{31-0} = Word0; let Inst{63-32} = Word1; @@ -672,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. def SETE : R600_2OP < 0x08, "SETE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] >; def SGT : R600_2OP < 0x09, "SETGT", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] >; def SGE : R600_2OP < 0xA, "SETGE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] >; def SNE : R600_2OP < 0xB, "SETNE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] >; def SETE_DX10 : R600_2OP < 0xC, "SETE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] >; def SETGT_DX10 : R600_2OP < 0xD, "SETGT_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] >; def SETGE_DX10 : R600_2OP < 0xE, "SETGE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] >; def SETNE_DX10 : R600_2OP < 0xF, "SETNE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; @@ -805,12 +822,12 @@ def CNDE_INT : R600_3OP < def CNDGE_INT : R600_3OP < 0x1E, "CNDGE_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))] >; def CNDGT_INT : R600_3OP < 0x1D, "CNDGT_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))] >; //===----------------------------------------------------------------------===// @@ -863,6 +880,9 @@ def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; def TEX_LD : R600_TEX <0x03, "TEX_LD">; +def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { + let INST_MOD = 1; +} def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; @@ -881,6 +901,7 @@ defm : TexPattern<6, TEX_LD, v4i32>; defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; defm : TexPattern<8, TEX_GET_GRADIENTS_H>; defm : TexPattern<9, TEX_GET_GRADIENTS_V>; +defm : TexPattern<10, TEX_LDPTR, v4i32>; //===----------------------------------------------------------------------===// // Helper classes for common instructions @@ -903,18 +924,22 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < class CNDE_Common <bits<5> inst> : R600_3OP < inst, "CNDE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] >; class CNDGT_Common <bits<5> inst> : R600_3OP < inst, "CNDGT", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] ->; + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] +> { + let Itinerary = VecALU; +} class CNDGE_Common <bits<5> inst> : R600_3OP < inst, "CNDGE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] ->; + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] +> { + let Itinerary = VecALU; +} let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { @@ -984,35 +1009,30 @@ multiclass CUBE_Common <bits<11> inst> { class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "EXP_IEEE", fexp2 > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_INT", fp_to_sint > { - let TransOnly = 1; let Itinerary = TransALU; } class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "INT_TO_FLT", sint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_UINT", fp_to_uint > { - let TransOnly = 1; let Itinerary = TransALU; } class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "UINT_TO_FLT", uint_to_fp > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1023,7 +1043,6 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "LOG_IEEE", flog2 > { - let TransOnly = 1; let Itinerary = TransALU; } @@ -1033,75 +1052,68 @@ class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI_INT", mulhs > { - let TransOnly = 1; let Itinerary = TransALU; } class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI", mulhu > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULLO_INT", mul > { - let TransOnly = 1; let Itinerary = TransALU; } class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIP_CLAMPED", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIP_UINT", AMDGPUurecip > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq > { - let TransOnly = 1; let Itinerary = TransALU; } class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] > { - let TransOnly = 1; let Itinerary = TransALU; } class SIN_Common <bits<11> inst> : R600_1OP < inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } class COS_Common <bits<11> inst> : R600_1OP < inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; - let TransOnly = 1; let Itinerary = TransALU; } +def CLAMP_R600 : CLAMP <R600_Reg32>; +def FABS_R600 : FABS<R600_Reg32>; +def FNEG_R600 : FNEG<R600_Reg32>; + //===----------------------------------------------------------------------===// // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// @@ -1124,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) >; +// FROUND pattern +class FROUNDPat<Instruction CNDGE> : Pat < + (AMDGPUround f32:$x), + (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) +>; + + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1165,11 +1184,12 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + def : FROUNDPat <CNDGE_r600>; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } @@ -1178,7 +1198,7 @@ let Predicates = [isR600] in { def R600_ExportBuf : ExportBufInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } @@ -1247,6 +1267,33 @@ let Predicates = [isR700] in { } //===----------------------------------------------------------------------===// +// Evergreen / Cayman store instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEGorCayman] in { + +class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins, + string name, list<dag> pattern> + : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, + "MEM_RAT_CACHELESS "#name, pattern>; + +class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name, + list<dag> pattern> + : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, + "MEM_RAT "#name, pattern>; + +def RAT_MSKOR : CF_MEM_RAT <0x11, 0, + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), + "MSKOR $rw_gpr.XW, $index_gpr", + [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] +> { + let eop = 0; +} + +} // End Predicates = [isEGorCayman] + + +//===----------------------------------------------------------------------===// // Evergreen Only instructions //===----------------------------------------------------------------------===// @@ -1274,36 +1321,32 @@ def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name, - list<dag> pattern> - : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> { -} - -} // End usesCustomInserter = 1 +let usesCustomInserter = 1 in { // 32-bit store -def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop", + "STORE_RAW $rw_gpr, $index_gpr, $eop", [(global_store i32:$rw_gpr, i32:$index_gpr)] >; // 64-bit store -def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop", + "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", [(global_store v2i32:$rw_gpr, i32:$index_gpr)] >; //128-bit store -def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < +def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop", + "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +} // End usesCustomInserter = 1 + class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> { @@ -1508,7 +1551,6 @@ let hasSideEffects = 1 in { def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { let Pattern = []; - let TransOnly = 0; let Itinerary = AnyALU; } @@ -1600,29 +1642,83 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS < let DisableEncoding = "$dst"; } -class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> : +class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern, + string dst =""> : R600_LDS < - lds_op, - (outs), + lds_op, outs, (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), - " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel", + " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", pattern > { + field string BaseOp; + let src2 = 0; let src2_rel = 0; let LDS_1A1D = 1; } +class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS_1A1D <lds_op, (outs), name, pattern> { + let BaseOp = name; +} + +class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> { + + let BaseOp = name; + let usesCustomInserter = 1; + let DisableEncoding = "$dst"; + let Defs = [OQAP]; +} + +class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> : + R600_LDS < + lds_op, + (outs), + (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, + LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), + " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", + pattern> { + let LDS_1A2D = 1; +} + +def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; +def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", + [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] +>; +def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", + [(truncstorei8_local i32:$src1, i32:$src0)] +>; +def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", + [(truncstorei16_local i32:$src1, i32:$src0)] +>; +def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", + [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] +>; +def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", + [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] +>; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; - -def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", - [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] +def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", + [(set i32:$dst, (sextloadi8_local i32:$src0))] +>; +def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", + [(set i32:$dst, (az_extloadi8_local i32:$src0))] +>; +def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", + [(set i32:$dst, (sextloadi16_local i32:$src0))] +>; +def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", + [(set i32:$dst, (az_extloadi16_local i32:$src0))] >; // TRUNC is used for the FLT_TO_INT instructions to work around a @@ -1642,9 +1738,11 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", // SHA-256 Patterns def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; + def : FROUNDPat <CNDGE_eg>; + def EG_ExportSwz : ExportSwzInst { let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{20} = 0; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; let Word1{30} = 0; // MARK @@ -1654,7 +1752,7 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", def EG_ExportBuf : ExportBufInst { let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{20} = 0; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; let Word1{30} = 0; // MARK @@ -1771,23 +1869,17 @@ def : Pat < def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - -class RAT_STORE_DWORD_cm <bits<4> mask, dag ins, list<dag> pat> : EG_CF_RAT < - 0x57, 0x14, mask, (outs), ins, - "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat -> { +class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : + CF_MEM_RAT_CACHELESS <0x14, 0, mask, + (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), + "STORE_DWORD $rw_gpr, $index_gpr", + [(global_store vt:$rw_gpr, i32:$index_gpr)]> { let eop = 0; // This bit is not used on Cayman. } -def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1, - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), - [(global_store i32:$rw_gpr, i32:$index_gpr)] ->; - -def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3, - (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr), - [(global_store v2i32:$rw_gpr, i32:$index_gpr)] ->; +def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>; +def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>; +def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>; class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern> : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> { @@ -2012,10 +2104,6 @@ def TXD_SHADOW: InstR600 < } // End isPseudo = 1 } // End usesCustomInserter = 1 -def CLAMP_R600 : CLAMP <R600_Reg32>; -def FABS_R600 : FABS<R600_Reg32>; -def FNEG_R600 : FNEG<R600_Reg32>; - //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// @@ -2164,7 +2252,7 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), "; Pseudo unconditional branch instruction", [(br bb:$target)]>; - defm BRANCH_COND : BranchConditional<IL_brcond>; + defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>; } //===---------------------------------------------------------------------===// @@ -2235,7 +2323,7 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>; //CNDGE_INT extra pattern def : Pat < - (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), + (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), (CNDGE_INT $src0, $src1, $src2) >; @@ -2250,86 +2338,6 @@ def KIL : Pat < (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; -// SGT Reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT), - (SGT $src1, $src0) ->; - -// SGE Reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE $src1, $src0) ->; - -// SETGT_DX10 reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT), - (SETGT_DX10 $src1, $src0) ->; - -// SETGE_DX10 reverse args -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE), - (SETGE_DX10 $src1, $src0) ->; - -// SETGT_INT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETLT), - (SETGT_INT $src1, $src0) ->; - -// SETGE_INT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETLE), - (SETGE_INT $src1, $src0) ->; - -// SETGT_UINT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETULT), - (SETGT_UINT $src1, $src0) ->; - -// SETGE_UINT reverse args -def : Pat < - (selectcc i32:$src0, i32:$src1, -1, 0, SETULE), - (SETGE_UINT $src1, $src0) ->; - -// The next two patterns are special cases for handling 'true if ordered' and -// 'true if unordered' conditionals. The assumption here is that the behavior of -// SETE and SNE conforms to the Direct3D 10 rules for floating point values -// described here: -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit -// We assume that SETE returns false when one of the operands is NAN and -// SNE returns true when on of the operands is NAN - -//SETE - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO), - (SETE $src0, $src1) ->; - -//SETE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETO), - (SETE_DX10 $src0, $src1) ->; - -//SNE - 'true if unordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO), - (SNE $src0, $src1) ->; - -//SETNE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETUO), - (SETNE_DX10 $src0, $src1) ->; - def : Extract_Element <f32, v4f32, 0, sub0>; def : Extract_Element <f32, v4f32, 1, sub1>; def : Extract_Element <f32, v4f32, 2, sub2>; @@ -2378,3 +2386,11 @@ def : BitConvert <v4i32, v4f32, R600_Reg128>; def : DwordAddrPat <i32, R600_Reg32>; } // End isR600toCayman Predicate + +def getLDSNoRetOp : InstrMapping { + let FilterClass = "R600_LDS_1A1D"; + let RowFields = ["BaseOp"]; + let ColFields = ["DisableEncoding"]; + let KeyCol = ["$dst"]; + let ValueCols = [[""""]]; +} |