diff options
Diffstat (limited to 'lib/Target/R600/R600Instructions.td')
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 73fa345..b6c00f8 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -216,7 +216,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, def TEX_SHADOW : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); + return (TType >= 6 && TType <= 8) || TType == 13; }] >; @@ -475,13 +475,13 @@ class ExportBufWord1 { multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0), 0, 61, 0, 7, 7, 7, cf_inst, 0) >; def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0), 0, 61, 7, 0, 7, 7, cf_inst, 0) >; @@ -513,17 +513,17 @@ multiclass SteamOutputExportPattern<Instruction ExportInst, // Stream1 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, 0, imm:$arraybase, + (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, 0, imm:$arraybase, + (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), - (ExportInst R600_Reg128:$src, 0, imm:$arraybase, + (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; } @@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; // Non-IEEE MUL: 0 * anything = 0 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; -def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; -def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; +// TODO: Do these actually match the regular fmin/fmax behavior? +def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; +def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>; // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, // so some of the instruction names don't match the asm string. @@ -915,6 +916,11 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; +class FMA_Common <bits<5> inst> : R600_3OP < + inst, "FMA", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU +>; + class CNDE_Common <bits<5> inst> : R600_3OP < inst, "CNDE", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] @@ -1068,7 +1074,7 @@ class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < } class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] + inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))] > { let Itinerary = TransALU; } @@ -1114,6 +1120,7 @@ def FNEG_R600 : FNEG<R600_Reg32>; // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// +// FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common <InstR600 recip_ieee> { def : Pat< (int_AMDGPU_div f32:$src0, f32:$src1), @@ -1124,6 +1131,8 @@ def : Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; + +def : RcpPat<recip_ieee, f32>; } class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> @@ -1133,9 +1142,12 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie >; // FROUND pattern -class FROUNDPat<Instruction CNDGE> : Pat < +class FROUNDPat<Instruction CNDGE, Instruction CNDGT> : Pat < (AMDGPUround f32:$x), - (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) + (CNDGE $x, + (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)), + (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) + ) >; @@ -1180,7 +1192,9 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; - def : FROUNDPat <CNDGE_r600>; + defm : RsqPat<RECIPSQRT_IEEE_r600, f32>; + + def : FROUNDPat <CNDGE_r600, CNDGT_r600>; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT @@ -1482,6 +1496,7 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let isCodeGenOnly = 1; } multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> { |