From 1503aba4a036f5394c7983417bc1e64613b2fc77 Mon Sep 17 00:00:00 2001
From: Elena Demikhovsky <elena.demikhovsky@intel.com>
Date: Wed, 1 Aug 2012 12:06:00 +0000
Subject: Added FMA functionality to X86 target.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161110 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 747bc44..0f019ef 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5679,7 +5679,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
       DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegal(ISD::FMA, VT)) {
+      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
 
     // fold (fadd (fmul x, y), z) -> (fma x, y, z)
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
@@ -5704,6 +5704,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5724,11 +5725,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+      return DAG.getNode(ISD::FNEG, dl, VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
-    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+    return DAG.getNode(ISD::FADD, dl, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
   // If 'unsafe math' is enabled, fold
@@ -5756,23 +5757,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
       DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegal(ISD::FMA, VT)) {
+      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
 
     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
-      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+      return DAG.getNode(ISD::FMA, dl, VT,
                          N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+                         DAG.getNode(ISD::FNEG, dl, VT, N1));
     }
 
     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
     // Note: Commutes FSUB operands.
     if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
-      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
-                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+      return DAG.getNode(ISD::FMA, dl, VT,
+                         DAG.getNode(ISD::FNEG, dl, VT,
                          N1.getOperand(0)),
                          N1.getOperand(1), N0);
     }
+
+    // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+    if (N0.getOpcode() == ISD::FNEG && 
+        N0.getOperand(0).getOpcode() == ISD::FMUL &&
+        N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+      SDValue N00 = N0.getOperand(0).getOperand(0);
+      SDValue N01 = N0.getOperand(0).getOperand(1);
+      return DAG.getNode(ISD::FMA, dl, VT,
+                         DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+                         DAG.getNode(ISD::FNEG, dl, VT, N1));
+    }
   }
 
   return SDValue();
-- 
cgit v1.1


From 7c626d30974c632ab500171ff185a24bcf2603bf Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor@mac.com>
Date: Mon, 13 Aug 2012 23:32:49 +0000
Subject: Add a roundToIntegral method to APFloat, which can be parameterized
 over various rounding modes.  Use this to implement SelectionDAG constant
 folding of FFLOOR, FCEIL, and FTRUNC.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161807 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 ++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0f019ef..4e29879 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -228,6 +228,9 @@ namespace {
     SDValue visitFP_EXTEND(SDNode *N);
     SDValue visitFNEG(SDNode *N);
     SDValue visitFABS(SDNode *N);
+    SDValue visitFCEIL(SDNode *N);
+    SDValue visitFTRUNC(SDNode *N);
+    SDValue visitFFLOOR(SDNode *N);
     SDValue visitBRCOND(SDNode *N);
     SDValue visitBR_CC(SDNode *N);
     SDValue visitLOAD(SDNode *N);
@@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   case ISD::FNEG:               return visitFNEG(N);
   case ISD::FABS:               return visitFABS(N);
+  case ISD::FFLOOR:             return visitFFLOOR(N);
+  case ISD::FCEIL:              return visitFCEIL(N);
+  case ISD::FTRUNC:             return visitFTRUNC(N);
   case ISD::BRCOND:             return visitBRCOND(N);
   case ISD::BR_CC:              return visitBR_CC(N);
   case ISD::LOAD:               return visitLOAD(N);
@@ -6243,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fceil c1) -> fceil(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ftrunc c1) -> ftrunc(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ffloor c1) -> ffloor(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-- 
cgit v1.1


From 08da177c355dcde40dbe6c0a21b26956fcbca32c Mon Sep 17 00:00:00 2001
From: Stepan Dyatkovskiy <stpworld@narod.ru>
Date: Mon, 20 Aug 2012 07:57:06 +0000
Subject: Fixed DAGCombiner bug (found and localized by James Malloy): The
 DAGCombiner tries to optimise a BUILD_VECTOR by checking if it consists
 purely of get_vector_elts from one or two source vectors. If so, it either
 makes a concat_vectors node or a shufflevector node.

However, it doesn't check the element type width of the underlying
vector, so if you have this sequence:

Node0: v4i16 = ...
Node1: i32 = extract_vector_elt Node0
Node2: i32 = extract_vector_elt Node0
Node3: v16i8 = BUILD_VECTOR Node1, Node2, ...

It will attempt to:

Node0:    v4i16 = ...
NewNode1: v16i8 = concat_vectors Node0, ...

Where this is actually invalid because the element width is completely
different. This causes an assertion failure on DAG legalization stage.

Fix:
If output item type of BUILD_VECTOR differs from input item type.
Make concat_vectors based on input element type and then bitcast it to the output vector type. So the case described above will transformed to:
Node0:    v4i16 = ...
NewNode1: v8i16 = concat_vectors Node0, ...
NewNode2: v16i8 = bitcast NewNode1



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162195 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4e29879..1c485a0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7876,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
         return SDValue();
 
-      // Widen the input vector by adding undef values.
-      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
-                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+      // If the element type of the input vector is not the same as
+      // the output element type, make concat_vectors based on input element
+      // type and then bitcast it to the output vector type.
+      //
+      // In another words avoid nodes like this:
+      //  <NODE> v16i8 = concat_vectors v4i16 v4i16
+      // Replace it with this one:
+      //  <NODE0> v8i16 = concat_vectors v4i16 v4i16
+      //  <NODE1> v16i8 = bitcast NODE0
+      EVT ItemType = VecIn1.getValueType().getVectorElementType();
+      if (ItemType != VT.getVectorElementType()) {
+        EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+                                ItemType,
+                                VecIn1.getValueType().getVectorNumElements()*2);
+        // Widen the input vector by adding undef values.
+        VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+        VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+      } else
+        // Widen the input vector by adding undef values.
+        VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
     }
 
     // If VecIn2 is unused then change it to undef.
-- 
cgit v1.1