When both x/y and x%y are needed (x and y both scalar integer), compute

both results with a single div or idiv instruction. This uses new X86ISD nodes for DIV and IDIV which are introduced during the legalize phase so that the SelectionDAG's CSE can automatically eliminate redundant computations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42308 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2007-09-25 18:23:27 +0000
committer: Dan Gohman <gohman@apple.com> 2007-09-25 18:23:27 +0000
commit: a37c9f7506af622b9f29a35466b33c650c75e9f7 (patch)
tree: 4749c09d72e3e4f7fc9c436ac321fd9cca2f5a1b /lib
parent: 798b4afd48ebc0acc165789ab913ccd28466ef68 (diff)
download: external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.zip
external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.tar.gz
external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.tar.bz2
3 files changed, 88 insertions, 25 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b33ced8..117d273 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1162,12 +1162,9 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
       return NULL;
     }
       
-    case ISD::SDIV:
-    case ISD::UDIV:
-    case ISD::SREM:
-    case ISD::UREM: {
-      bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
-      bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+    case X86ISD::DIV:
+    case X86ISD::IDIV: {
+      bool isSigned = Opcode == X86ISD::IDIV;
       if (!isSigned)
         switch (NVT) {
         default: assert(0 && "Unsupported VT!");
@@ -1275,31 +1272,49 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
           SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
       }
 
-      unsigned Reg = isDiv ? LoReg : HiReg;
-      SDOperand Result;
-      if (Reg == X86::AH && Subtarget->is64Bit()) {
-        // Prevent use of AH in a REX instruction by referencing AX instead.
-        // Shift it down 8 bits.
-        Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
-        Chain = Result.getValue(1);
-        Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
-                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
-        // Then truncate it down to i8.
-        SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
-        Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
-                                                 MVT::i8, Result, SRIdx), 0);
-      } else {
-        Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag);
+      // Copy the division (low) result, if it is needed.
+      if (!N.getValue(0).use_empty()) {
+        SDOperand Result = CurDAG->getCopyFromReg(Chain, LoReg, NVT, InFlag);
         Chain = Result.getValue(1);
+        InFlag = Result.getValue(2);
+        ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+        DOUT << std::string(Indent-2, ' ') << "=> ";
+        DEBUG(Result.Val->dump(CurDAG));
+        DOUT << "\n";
+#endif
+      }
+      // Copy the remainder (high) result, if it is needed.
+      if (!N.getValue(1).use_empty()) {
+        SDOperand Result;
+        if (HiReg == X86::AH && Subtarget->is64Bit()) {
+          // Prevent use of AH in a REX instruction by referencing AX instead.
+          // Shift it down 8 bits.
+          Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
+          Chain = Result.getValue(1);
+          InFlag = Result.getValue(2);
+          Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
+                                       CurDAG->getTargetConstant(8, MVT::i8)), 0);
+          // Then truncate it down to i8.
+          SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+          Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
+                                                   MVT::i8, Result, SRIdx), 0);
+        } else {
+          Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
+          Chain = Result.getValue(1);
+          InFlag = Result.getValue(2);
+        }
+        ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+        DOUT << std::string(Indent-2, ' ') << "=> ";
+        DEBUG(Result.Val->dump(CurDAG));
+        DOUT << "\n";
+#endif
       }
-      ReplaceUses(N.getValue(0), Result);
       if (foldedLoad)
         ReplaceUses(N1.getValue(1), Chain);
 
 #ifndef NDEBUG
-      DOUT << std::string(Indent-2, ' ') << "=> ";
-      DEBUG(Result.Val->dump(CurDAG));
-      DOUT << "\n";
       Indent -= 2;
 #endif
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a67e77f..1a80896 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -155,6 +155,27 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
     setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
   }
 
+  // Divide and remainder are lowered to use div or idiv in legalize in
+  // order to expose the intermediate computations to trivial CSE. This is
+  // most noticeable when both x/y and x%y are being computed; they can be
+  // done with a single div or idiv.
+  setOperationAction(ISD::SDIV            , MVT::i8    , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i8    , Custom);
+  setOperationAction(ISD::SREM            , MVT::i8    , Custom);
+  setOperationAction(ISD::UREM            , MVT::i8    , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i16   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i16   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i16   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i16   , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i32   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i32   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i32   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i32   , Custom);
+  setOperationAction(ISD::SDIV            , MVT::i64   , Custom);
+  setOperationAction(ISD::UDIV            , MVT::i64   , Custom);
+  setOperationAction(ISD::SREM            , MVT::i64   , Custom);
+  setOperationAction(ISD::UREM            , MVT::i64   , Custom);
+
   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
   setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
@@ -3393,6 +3414,22 @@ SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
 }
 
+SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) {
+  unsigned Opcode = Op.getOpcode();
+  MVT::ValueType NVT = Op.getValueType();
+  bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
+  bool isDiv    = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+  unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV;
+
+  SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+  SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2);
+
+  if (isDiv)
+    return DR;
+
+  return SDOperand(DR.Val, 1);
+}
+
 SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
   assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
          Op.getOperand(0).getValueType() >= MVT::i16 &&
@@ -4668,6 +4705,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
   case ISD::SHL_PARTS:
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:               return LowerIntegerDivOrRem(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FABS:               return LowerFABS(Op, DAG);
@@ -4751,6 +4792,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::THREAD_POINTER:     return "X86ISD::THREAD_POINTER";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
+  case X86ISD::DIV:                return "X86ISD::DIV";
+  case X86ISD::IDIV:               return "X86ISD::IDIV";
   }
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 04279e8..10172d9 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -181,6 +181,10 @@ namespace llvm {
       /// in order to obtain suitable precision.
       FRSQRT, FRCP,
 
+      /// DIV, IDIV - Unsigned and signed integer division and reciprocal.
+      ///
+      DIV, IDIV,
+
       // Thread Local Storage
       TLSADDR, THREAD_POINTER,
 
@@ -420,6 +424,7 @@ namespace llvm {
     SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG);
+    SDOperand LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
author	Dan Gohman <gohman@apple.com>	2007-09-25 18:23:27 +0000
committer	Dan Gohman <gohman@apple.com>	2007-09-25 18:23:27 +0000
commit	a37c9f7506af622b9f29a35466b33c650c75e9f7 (patch)
tree	4749c09d72e3e4f7fc9c436ac321fd9cca2f5a1b /lib
parent	798b4afd48ebc0acc165789ab913ccd28466ef68 (diff)
download	external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.zip external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.tar.gz external_llvm-a37c9f7506af622b9f29a35466b33c650c75e9f7.tar.bz2