From 1503aba4a036f5394c7983417bc1e64613b2fc77 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Wed, 1 Aug 2012 12:06:00 +0000 Subject: Added FMA functionality to X86 target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161110 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 747bc44..0f019ef 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5679,7 +5679,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { @@ -5704,6 +5704,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5724,11 +5725,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold @@ -5756,23 +5757,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + DAG.getNode(ISD::FNEG, dl, VT, N1)); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } } return SDValue(); -- cgit v1.1 From 7c626d30974c632ab500171ff185a24bcf2603bf Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 13 Aug 2012 23:32:49 +0000 Subject: Add a roundToIntegral method to APFloat, which can be parameterized over various rounding modes. Use this to implement SelectionDAG constant folding of FFLOOR, FCEIL, and FTRUNC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161807 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0f019ef..4e29879 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -228,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -6243,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); -- cgit v1.1 From 08da177c355dcde40dbe6c0a21b26956fcbca32c Mon Sep 17 00:00:00 2001 From: Stepan Dyatkovskiy Date: Mon, 20 Aug 2012 07:57:06 +0000 Subject: Fixed DAGCombiner bug (found and localized by James Malloy): The DAGCombiner tries to optimise a BUILD_VECTOR by checking if it consists purely of get_vector_elts from one or two source vectors. If so, it either makes a concat_vectors node or a shufflevector node. However, it doesn't check the element type width of the underlying vector, so if you have this sequence: Node0: v4i16 = ... Node1: i32 = extract_vector_elt Node0 Node2: i32 = extract_vector_elt Node0 Node3: v16i8 = BUILD_VECTOR Node1, Node2, ... It will attempt to: Node0: v4i16 = ... NewNode1: v16i8 = concat_vectors Node0, ... Where this is actually invalid because the element width is completely different. This causes an assertion failure on DAG legalization stage. Fix: If output item type of BUILD_VECTOR differs from input item type. Make concat_vectors based on input element type and then bitcast it to the output vector type. So the case described above will transformed to: Node0: v4i16 = ... NewNode1: v8i16 = concat_vectors Node0, ... NewNode2: v16i8 = bitcast NewNode1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162195 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4e29879..1c485a0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7876,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the element type of the input vector is not the same as + // the output element type, make concat_vectors based on input element + // type and then bitcast it to the output vector type. + // + // In another words avoid nodes like this: + // v16i8 = concat_vectors v4i16 v4i16 + // Replace it with this one: + // v8i16 = concat_vectors v4i16 v4i16 + // v16i8 = bitcast NODE0 + EVT ItemType = VecIn1.getValueType().getVectorElementType(); + if (ItemType != VT.getVectorElementType()) { + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), + ItemType, + VecIn1.getValueType().getVectorNumElements()*2); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); + } else + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } // If VecIn2 is unused then change it to undef. -- cgit v1.1