R600: Add support for SET*_DX10 instructions

These instructions compare two floating point values and return an integer true (-1) or false (0) value. When compiling code generated by the Mesa GLSL frontend, the SET*_DX10 instructions save us four instructions for most branch decisions that use floating-point comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174609 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2013-02-07 14:02:35 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2013-02-07 14:02:35 +0000
commit: 1234c9be42b4ebd4b398df461123205dccf3706c (patch)
tree: d328469268473b89e100f52577a0dbfe6f244eb5 /lib/Target/R600
parent: 2a77cf7f47900c8df55cfea794936006b494e943 (diff)
download: external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.zip
external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.tar.gz
external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.tar.bz2
2 files changed, 131 insertions, 29 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 85187f8..8f4ec94 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -90,7 +90,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
 
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::FP_TO_SINT);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::SELECT_CC);
 
   setSchedulingPreference(Sched::VLIW);
 }
@@ -670,9 +672,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   }
 
   // Try to lower to a SET* instruction:
-  // We need all the operands of SELECT_CC to have the same value type, so if
-  // necessary we need to change True and False to be the same type as LHS and
-  // RHS, and then convert the result of the select_cc back to the correct type.
+  //
+  // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
+  // but for the other case where CompareVT != VT, all operands of
+  // SELECT_CC need to have the same value type, so we need to change True and
+  // False to be the same type as LHS and RHS, and then convert the result of
+  // the select_cc back to the correct type.
 
   // Move hardware True/False values to the correct operand.
   if (isHWTrueValue(False) && isHWFalseValue(True)) {
@@ -682,32 +687,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   }
 
   if (isHWTrueValue(True) && isHWFalseValue(False)) {
-    if (CompareVT !=  VT) {
-      if (VT == MVT::f32 && CompareVT == MVT::i32) {
-        SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-            LHS, RHS,
-            DAG.getConstant(-1, MVT::i32),
-            DAG.getConstant(0, MVT::i32),
-            CC);
-        // Convert integer values of true (-1) and false (0) to fp values of
-        // true (1.0f) and false (0.0f).
-        SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
-                                                  DAG.getConstant(1, MVT::i32));
-        return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
-      } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
-        SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-            LHS, RHS,
-            DAG.getConstantFP(1.0f, MVT::f32),
-            DAG.getConstantFP(0.0f, MVT::f32),
-            CC);
-        // Convert fp values of true (1.0f) and false (0.0f) to integer values
-        // of true (-1) and false (0).
-        SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
-        return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
-      } else {
-        // I don't think there will be any other type pairings.
-        assert(!"Unhandled operand type parings in SELECT_CC");
-      }
+    if (CompareVT !=  VT && VT == MVT::f32 && CompareVT == MVT::i32) {
+      SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+          LHS, RHS,
+          DAG.getConstant(-1, MVT::i32),
+          DAG.getConstant(0, MVT::i32),
+          CC);
+      // Convert integer values of true (-1) and false (0) to fp values of
+      // true (1.0f) and false (0.0f).
+      SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
+                                                DAG.getConstant(1, MVT::i32));
+      return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
     } else {
       // This SELECT_CC is already legal.
       return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
@@ -1128,6 +1118,35 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       }
       break;
     }
+
+  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+  // (i32 select_cc f32, f32, -1, 0 cc)
+  //
+  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+  // this to one of the SET*_DX10 instructions.
+  case ISD::FP_TO_SINT: {
+    SDValue FNeg = N->getOperand(0);
+    if (FNeg.getOpcode() != ISD::FNEG) {
+      return SDValue();
+    }
+    SDValue SelectCC = FNeg.getOperand(0);
+    if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+        SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+        SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+        !isHWTrueValue(SelectCC.getOperand(2)) ||
+        !isHWFalseValue(SelectCC.getOperand(3))) {
+      return SDValue();
+    }
+
+    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+                           SelectCC.getOperand(0), // LHS
+                           SelectCC.getOperand(1), // RHS
+                           DAG.getConstant(-1, MVT::i32), // True
+                           DAG.getConstant(0, MVT::i32),  // Flase
+                           SelectCC.getOperand(4)); // CC
+
+    break;
+  }
   // Extract_vec (Build_vector) generated by custom lowering
   // also needs to be customly combined
   case ISD::EXTRACT_VECTOR_ELT: {
@@ -1147,6 +1166,37 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       }
     }
   }
+
+  case ISD::SELECT_CC: {
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+    //      selectcc x, y, a, b, inv(cc)
+    SDValue LHS = N->getOperand(0);
+    if (LHS.getOpcode() != ISD::SELECT_CC) {
+      return SDValue();
+    }
+
+    SDValue RHS = N->getOperand(1);
+    SDValue True = N->getOperand(2);
+    SDValue False = N->getOperand(3);
+
+    if (LHS.getOperand(2).getNode() != True.getNode() ||
+        LHS.getOperand(3).getNode() != False.getNode() ||
+        RHS.getNode() != False.getNode() ||
+        cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+      return SDValue();
+    }
+
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
+    CCOpcode = ISD::getSetCCInverse(
+                        CCOpcode, LHS.getOperand(0).getValueType().isInteger());
+    return DAG.getSelectCC(N->getDebugLoc(),
+                           LHS.getOperand(0),
+                           LHS.getOperand(1),
+                           LHS.getOperand(2),
+                           LHS.getOperand(3),
+                           CCOpcode);
+
+  }
   }
   return SDValue();
 }
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index d40e75b..06196e7 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -711,6 +711,34 @@ def SNE : R600_2OP <
     COND_NE))]
 >;
 
+def SETE_DX10 : R600_2OP <
+  0xC, "SETE_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+  0xD, "SETGT_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+  0xE, "SETGE_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+  0xF, "SETNE_DX10",
+  [(set R600_Reg32:$dst,
+    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+     COND_NE))]
+>;
+
 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
 def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
 def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
@@ -1772,6 +1800,18 @@ def : Pat <
   (SGE R600_Reg32:$src1, R600_Reg32:$src0) 
 >;
 
+// SETGT_DX10 reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+  (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+  (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
 // SETGT_INT reverse args
 def : Pat <
   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
@@ -1810,12 +1850,24 @@ def : Pat <
   (SETE R600_Reg32:$src0, R600_Reg32:$src1)
 >;
 
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+  (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
 //SNE - 'true if unordered'
 def : Pat <
   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
   (SNE R600_Reg32:$src0, R600_Reg32:$src1)
 >;
 
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+  (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
 def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
 def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
 def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
author	Tom Stellard <thomas.stellard@amd.com>	2013-02-07 14:02:35 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2013-02-07 14:02:35 +0000
commit	1234c9be42b4ebd4b398df461123205dccf3706c (patch)
tree	d328469268473b89e100f52577a0dbfe6f244eb5 /lib/Target/R600
parent	2a77cf7f47900c8df55cfea794936006b494e943 (diff)
download	external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.zip external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.tar.gz external_llvm-1234c9be42b4ebd4b398df461123205dccf3706c.tar.bz2