1 files changed, 70 insertions, 142 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index b5c2a93..a73691d 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -50,8 +50,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
 
-  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
-  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
 
   setOperationAction(ISD::FSUB, MVT::f32, Expand);
 
@@ -65,8 +65,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
-  setOperationAction(ISD::SETCC, MVT::i32, Custom);
-  setOperationAction(ISD::SETCC, MVT::f32, Custom);
+  setOperationAction(ISD::SETCC, MVT::i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f32, Expand);
   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
 
   setOperationAction(ISD::SELECT, MVT::i32, Custom);
@@ -94,6 +94,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::SELECT_CC);
 
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
   setSchedulingPreference(Sched::VLIW);
 }
 
@@ -105,7 +106,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
 
   switch (MI->getOpcode()) {
   default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-  case AMDGPU::SHADER_TYPE: break;
   case AMDGPU::CLAMP_R600: {
     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
                                                    AMDGPU::MOV,
@@ -150,7 +150,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
                      MI->getOperand(1).getImm());
     break;
-
+  case AMDGPU::CONST_COPY: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+        MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+    TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+        MI->getOperand(1).getImm());
+    break;
+  }
 
   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
@@ -215,8 +221,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
 
   case AMDGPU::BRANCH:
       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
-              .addOperand(MI->getOperand(0))
-              .addReg(0);
+              .addOperand(MI->getOperand(0));
       break;
 
   case AMDGPU::BRANCH_COND_f32: {
@@ -227,7 +232,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
               .addImm(OPCODE_IS_NOT_ZERO)
               .addImm(0); // Flags
     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
             .addOperand(MI->getOperand(0))
             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
     break;
@@ -241,7 +246,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
             .addImm(OPCODE_IS_NOT_ZERO_INT)
             .addImm(0); // Flags
     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
            .addOperand(MI->getOperand(0))
             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
     break;
@@ -306,11 +311,9 @@ using namespace llvm::AMDGPUIntrinsic;
 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
   case ISD::ROTL: return LowerROTL(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
-  case ISD::SETCC: return LowerSETCC(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
   case ISD::LOAD: return LowerLOAD(Op, DAG);
   case ISD::FPOW: return LowerFPOW(Op, DAG);
@@ -470,44 +473,6 @@ SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
       );
 }
 
-SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain = Op.getOperand(0);
-  SDValue CC = Op.getOperand(1);
-  SDValue LHS   = Op.getOperand(2);
-  SDValue RHS   = Op.getOperand(3);
-  SDValue JumpT  = Op.getOperand(4);
-  SDValue CmpValue;
-  SDValue Result;
-
-  if (LHS.getValueType() == MVT::i32) {
-    CmpValue = DAG.getNode(
-        ISD::SELECT_CC,
-        Op.getDebugLoc(),
-        MVT::i32,
-        LHS, RHS,
-        DAG.getConstant(-1, MVT::i32),
-        DAG.getConstant(0, MVT::i32),
-        CC);
-  } else if (LHS.getValueType() == MVT::f32) {
-    CmpValue = DAG.getNode(
-        ISD::SELECT_CC,
-        Op.getDebugLoc(),
-        MVT::f32,
-        LHS, RHS,
-        DAG.getConstantFP(1.0f, MVT::f32),
-        DAG.getConstantFP(0.0f, MVT::f32),
-        CC);
-  } else {
-    assert(0 && "Not valid type for br_cc");
-  }
-  Result = DAG.getNode(
-      AMDGPUISD::BRANCH_COND,
-      CmpValue.getDebugLoc(),
-      MVT::Other, Chain,
-      JumpT, CmpValue);
-  return Result;
-}
-
 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                                                    DebugLoc DL,
                                                    unsigned DwordOffset) const {
@@ -576,12 +541,37 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 
   // Check if we can lower this to a native operation.
 
+  // Try to lower to a SET* instruction:
+  //
+  // SET* can match the following patterns:
+  //
+  // select_cc f32, f32, -1,  0, cc_any
+  // select_cc f32, f32, 1.0f, 0.0f, cc_any
+  // select_cc i32, i32, -1,  0, cc_any
+  //
+
+  // Move hardware True/False values to the correct operand.
+  if (isHWTrueValue(False) && isHWFalseValue(True)) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    std::swap(False, True);
+    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+  }
+
+  if (isHWTrueValue(True) && isHWFalseValue(False) &&
+      (CompareVT == VT || VT == MVT::i32)) {
+    // This can be matched by a SET* instruction.
+    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+  }
+
   // Try to lower to a CND* instruction:
-  // CND* instructions requires RHS to be zero.  Some SELECT_CC nodes that
-  // can be lowered to CND* instructions can also be lowered to SET*
-  // instructions.  CND* instructions are cheaper, because they dont't
-  // require additional instructions to convert their result to the correct
-  // value type, so this check should be first.
+  //
+  // CND* can match the following patterns:
+  //
+  // select_cc f32, 0.0, f32, f32, cc_any
+  // select_cc f32, 0.0, i32, i32, cc_any
+  // select_cc i32, 0,   f32, f32, cc_any
+  // select_cc i32, 0,   i32, i32, cc_any
+  //
   if (isZero(LHS) || isZero(RHS)) {
     SDValue Cond = (isZero(LHS) ? RHS : LHS);
     SDValue Zero = (isZero(LHS) ? LHS : RHS);
@@ -623,38 +613,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
   }
 
-  // Try to lower to a SET* instruction:
-  //
-  // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
-  // but for the other case where CompareVT != VT, all operands of
-  // SELECT_CC need to have the same value type, so we need to change True and
-  // False to be the same type as LHS and RHS, and then convert the result of
-  // the select_cc back to the correct type.
-
-  // Move hardware True/False values to the correct operand.
-  if (isHWTrueValue(False) && isHWFalseValue(True)) {
-    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-    std::swap(False, True);
-    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
-  }
-
-  if (isHWTrueValue(True) && isHWFalseValue(False)) {
-    if (CompareVT !=  VT && VT == MVT::f32 && CompareVT == MVT::i32) {
-      SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
-          LHS, RHS,
-          DAG.getConstant(-1, MVT::i32),
-          DAG.getConstant(0, MVT::i32),
-          CC);
-      // Convert integer values of true (-1) and false (0) to fp values of
-      // true (1.0f) and false (0.0f).
-      SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
-                                                DAG.getConstant(1, MVT::i32));
-      return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
-    } else {
-      // This SELECT_CC is already legal.
-      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
-    }
-  }
 
   // Possible Min/Max pattern
   SDValue MinMax = LowerMinMax(Op, DAG);
@@ -698,48 +656,6 @@ SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       DAG.getCondCode(ISD::SETNE));
 }
 
-SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Cond;
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue CC  = Op.getOperand(2);
-  DebugLoc DL = Op.getDebugLoc();
-  assert(Op.getValueType() == MVT::i32);
-  if (LHS.getValueType() == MVT::i32) {
-    Cond = DAG.getNode(
-        ISD::SELECT_CC,
-        Op.getDebugLoc(),
-        MVT::i32,
-        LHS, RHS,
-        DAG.getConstant(-1, MVT::i32),
-        DAG.getConstant(0, MVT::i32),
-        CC);
-  } else if (LHS.getValueType() == MVT::f32) {
-    Cond = DAG.getNode(
-        ISD::SELECT_CC,
-        Op.getDebugLoc(),
-        MVT::f32,
-        LHS, RHS,
-        DAG.getConstantFP(1.0f, MVT::f32),
-        DAG.getConstantFP(0.0f, MVT::f32),
-        CC);
-    Cond = DAG.getNode(
-        ISD::FP_TO_SINT,
-        DL,
-        MVT::i32,
-        Cond);
-  } else {
-    assert(0 && "Not valid type for set_cc");
-  }
-  Cond = DAG.getNode(
-      ISD::AND,
-      DL,
-      MVT::i32,
-      DAG.getConstant(1, MVT::i32),
-      Cond);
-  return Cond;
-}
-
 /// LLVM generates byte-addresed pointers.  For indirect addressing, we need to
 /// convert these pointers to a register index.  Each register holds
 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
@@ -918,7 +834,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   if (ConstantBlock > -1) {
     SDValue Result;
     if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
-        dyn_cast<Constant>(LoadNode->getSrcValue())) {
+        dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+        dyn_cast<ConstantSDNode>(Ptr)) {
       SDValue Slots[4];
       for (unsigned i = 0; i < 4; i++) {
         // We want Const position encoded with the following formula :
@@ -934,7 +851,9 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
     } else {
       // non constant ptr cant be folded, keeps it as a v4f32 load
       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
-          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+          DAG.getConstant(LoadNode->getAddressSpace() -
+	                  AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
           );
     }
 
@@ -1122,6 +1041,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT_CC: {
     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
     //      selectcc x, y, a, b, inv(cc)
+    //
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+    //      selectcc x, y, a, b, cc
     SDValue LHS = N->getOperand(0);
     if (LHS.getOpcode() != ISD::SELECT_CC) {
       return SDValue();
@@ -1130,24 +1052,30 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
     SDValue RHS = N->getOperand(1);
     SDValue True = N->getOperand(2);
     SDValue False = N->getOperand(3);
+    ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
 
     if (LHS.getOperand(2).getNode() != True.getNode() ||
         LHS.getOperand(3).getNode() != False.getNode() ||
-        RHS.getNode() != False.getNode() ||
-        cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+        RHS.getNode() != False.getNode()) {
       return SDValue();
     }
 
-    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
-    CCOpcode = ISD::getSetCCInverse(
-                        CCOpcode, LHS.getOperand(0).getValueType().isInteger());
-    return DAG.getSelectCC(N->getDebugLoc(),
-                           LHS.getOperand(0),
-                           LHS.getOperand(1),
-                           LHS.getOperand(2),
-                           LHS.getOperand(3),
-                           CCOpcode);
+    switch (NCC) {
+    default: return SDValue();
+    case ISD::SETNE: return LHS;
+    case ISD::SETEQ: {
+      ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+      LHSCC = ISD::getSetCCInverse(LHSCC,
+                                  LHS.getOperand(0).getValueType().isInteger());
+      return DAG.getSelectCC(N->getDebugLoc(),
+                             LHS.getOperand(0),
+                             LHS.getOperand(1),
+                             LHS.getOperand(2),
+                             LHS.getOperand(3),
+                             LHSCC);
     }
+    }
+  }
   case AMDGPUISD::EXPORT: {
     SDValue Arg = N->getOperand(1);
     if (Arg.getOpcode() != ISD::BUILD_VECTOR)