Merge commit '100fbdd06be7590b23c4707a98cd605bdb519498' into merge_20130612

author: Stephen Hines <srhines@google.com> 2013-06-12 13:32:42 -0700
committer: Stephen Hines <srhines@google.com> 2013-06-12 13:32:42 -0700
commit: 1878f9a7874b1ff569d745c0269f49d3daf7203d (patch)
tree: 19a8dbaaedf6a056c617e87596b32d3f452af137 /lib/Target/R600/AMDILISelDAGToDAG.cpp
parent: 7a57f27b857ec4b243d83d392a399f02fc196c0a (diff)
parent: 100fbdd06be7590b23c4707a98cd605bdb519498 (diff)
download: external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.zip
external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.gz
external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.bz2
1 files changed, 182 insertions, 59 deletions
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index ba75a44..93432a2 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -14,14 +14,14 @@
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPURegisterInfo.h"
-#include "AMDILDevices.h"
 #include "R600InstrInfo.h"
 #include "SIISelLowering.h"
 #include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include <list>
 #include <queue>
 
@@ -48,7 +48,10 @@ public:
 
 private:
   inline SDValue getSmallIPtrImm(unsigned Imm);
+  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
+                   const R600InstrInfo *TII, std::vector<unsigned> Cst);
   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
 
   // Complex pattern selectors
   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@@ -164,7 +167,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   default: break;
   case ISD::BUILD_VECTOR: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
       break;
     }
     // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
@@ -194,7 +197,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   case ISD::BUILD_PAIR: {
     SDValue RC, SubReg0, SubReg1;
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
       break;
     }
     if (N->getValueType(0) == MVT::i128) {
@@ -211,7 +214,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
                             N->getOperand(1), SubReg1 };
     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
-                                  N->getDebugLoc(), N->getValueType(0), Ops);
+                                  SDLoc(N), N->getValueType(0), Ops);
   }
 
   case ISD::ConstantFP:
@@ -219,7 +222,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
     // XXX: Custom immediate lowering not implemented yet.  Instead we use
     // pseudo instructions defined in SIInstructions.td
-    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
       break;
     }
     const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
@@ -314,9 +317,23 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   // Fold operands of selected node
 
   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
-  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     const R600InstrInfo *TII =
         static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
+      bool IsModified = false;
+      do {
+        std::vector<SDValue> Ops;
+        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+            I != E; ++I)
+          Ops.push_back(*I);
+        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
+        if (IsModified) {
+          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+        }
+      } while (IsModified);
+      
+    }
     if (Result && Result->isMachineOpcode() &&
         !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
         && TII->isALUInstr(Result->getMachineOpcode())) {
@@ -359,6 +376,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   return Result;
 }
 
+bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
+                                     SDValue &Abs, const R600InstrInfo *TII,
+                                     std::vector<unsigned> Consts) {
+  switch (Src.getOpcode()) {
+  case AMDGPUISD::CONST_ADDRESS: {
+    SDValue CstOffset;
+    if (Src.getValueType().isVector() ||
+        !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
+      return false;
+
+    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+    Consts.push_back(Cst->getZExtValue());
+    if (!TII->fitsConstReadLimitations(Consts))
+      return false;
+
+    Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+    Sel = CstOffset;
+    return true;
+    }
+  case ISD::FNEG:
+    Src = Src.getOperand(0);
+    Neg = CurDAG->getTargetConstant(1, MVT::i32);
+    return true;
+  case ISD::FABS:
+    if (!Abs.getNode())
+      return false;
+    Src = Src.getOperand(0);
+    Abs = CurDAG->getTargetConstant(1, MVT::i32);
+    return true;
+  case ISD::BITCAST:
+    Src = Src.getOperand(0);
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
     const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
   int OperandIdx[] = {
@@ -382,59 +436,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
     -1
   };
 
+  // Gather constants values
+  std::vector<unsigned> Consts;
+  for (unsigned j = 0; j < 3; j++) {
+    int SrcIdx = OperandIdx[j];
+    if (SrcIdx < 0)
+      break;
+    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+      if (Reg->getReg() == AMDGPU::ALU_CONST) {
+        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+        Consts.push_back(Cst->getZExtValue());
+      }
+    }
+  }
+
   for (unsigned i = 0; i < 3; i++) {
     if (OperandIdx[i] < 0)
       return false;
-    SDValue Operand = Ops[OperandIdx[i] - 1];
-    switch (Operand.getOpcode()) {
-    case AMDGPUISD::CONST_ADDRESS: {
-      SDValue CstOffset;
-      if (Operand.getValueType().isVector() ||
-          !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
-        break;
-
-      // Gather others constants values
-      std::vector<unsigned> Consts;
-      for (unsigned j = 0; j < 3; j++) {
-        int SrcIdx = OperandIdx[j];
-        if (SrcIdx < 0)
-          break;
-        if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
-          if (Reg->getReg() == AMDGPU::ALU_CONST) {
-            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
-            Consts.push_back(Cst->getZExtValue());
-          }
-        }
-      }
+    SDValue &Src = Ops[OperandIdx[i] - 1];
+    SDValue &Sel = Ops[SelIdx[i] - 1];
+    SDValue &Neg = Ops[NegIdx[i] - 1];
+    SDValue FakeAbs;
+    SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
+    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+      return true;
+  }
+  return false;
+}
 
-      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
-      Consts.push_back(Cst->getZExtValue());
-      if (!TII->fitsConstReadLimitations(Consts))
-        break;
+bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
+    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+  int OperandIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
+  };
+  int SelIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
+  };
+  int NegIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
+  };
+  int AbsIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
+  };
 
-      Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-      Ops[SelIdx[i] - 1] = CstOffset;
-      return true;
-      }
-    case ISD::FNEG:
-      if (NegIdx[i] < 0)
-        break;
-      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
-      Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
-      return true;
-    case ISD::FABS:
-      if (AbsIdx[i] < 0)
-        break;
-      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
-      Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
-      return true;
-    case ISD::BITCAST:
-      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
-      return true;
-    default:
+  // Gather constants values
+  std::vector<unsigned> Consts;
+  for (unsigned j = 0; j < 8; j++) {
+    int SrcIdx = OperandIdx[j];
+    if (SrcIdx < 0)
       break;
+    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+      if (Reg->getReg() == AMDGPU::ALU_CONST) {
+        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+        Consts.push_back(Cst->getZExtValue());
+      }
     }
   }
+
+  for (unsigned i = 0; i < 8; i++) {
+    if (OperandIdx[i] < 0)
+      return false;
+    SDValue &Src = Ops[OperandIdx[i] - 1];
+    SDValue &Sel = Ops[SelIdx[i] - 1];
+    SDValue &Neg = Ops[NegIdx[i] - 1];
+    SDValue &Abs = Ops[AbsIdx[i] - 1];
+    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+      return true;
+  }
   return false;
 }
 
@@ -616,7 +712,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
              && isInt<16>(IMMOffset->getZExtValue())) {
     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
-                                  CurDAG->getEntryNode().getDebugLoc(),
+                                  SDLoc(CurDAG->getEntryNode()),
                                   AMDGPU::ZERO, MVT::i32);
     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
     return true;
@@ -649,18 +745,45 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
 
+  if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    return;
+  }
+
   // Go over all selected nodes and try to fold them a bit more
-  const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
+  const AMDGPUTargetLowering& Lowering = (*(const AMDGPUTargetLowering*)TLI);
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ++I) {
 
-    MachineSDNode *Node = dyn_cast<MachineSDNode>(I);
-    if (!Node)
+    SDNode *Node = I;
+    switch (Node->getOpcode()) {
+    // Fix the register class in copy to CopyToReg nodes - ISel will always
+    // use SReg classes for 64-bit copies, but this is not always what we want.
+    case ISD::CopyToReg: {
+      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      SDValue Val = Node->getOperand(2);
+      const TargetRegisterClass *RC = RegInfo->getRegClass(Reg);
+      if (RC != &AMDGPU::SReg_64RegClass) {
+        continue;
+      }
+
+      if (!Val.getNode()->isMachineOpcode()) {
+        continue;
+      }
+
+      const MCInstrDesc Desc = TM.getInstrInfo()->get(Val.getNode()->getMachineOpcode());
+      const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+      RegInfo->setRegClass(Reg, TRI->getRegClass(Desc.OpInfo[0].RegClass));
       continue;
+    }
+    }
 
-    SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG);
-    if (ResNode != Node)
+    MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+    if (!MachineNode)
+      continue;
+
+    SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
+    if (ResNode != Node) {
       ReplaceUses(Node, ResNode);
+    }
   }
 }
-
author	Stephen Hines <srhines@google.com>	2013-06-12 13:32:42 -0700
committer	Stephen Hines <srhines@google.com>	2013-06-12 13:32:42 -0700
commit	1878f9a7874b1ff569d745c0269f49d3daf7203d (patch)
tree	19a8dbaaedf6a056c617e87596b32d3f452af137 /lib/Target/R600/AMDILISelDAGToDAG.cpp
parent	7a57f27b857ec4b243d83d392a399f02fc196c0a (diff)
parent	100fbdd06be7590b23c4707a98cd605bdb519498 (diff)
download	external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.zip external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.gz external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.bz2