diff options
-rw-r--r-- | include/llvm/ADT/APFloat.h | 1 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 | ||||
-rw-r--r-- | lib/Support/APFloat.cpp | 26 | ||||
-rw-r--r-- | test/CodeGen/ARM/floorf.ll | 29 |
5 files changed, 116 insertions, 0 deletions
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 2b466f9..5a625a4 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -274,6 +274,7 @@ namespace llvm { /* C fmod, or llvm frem. */ opStatus mod(const APFloat &, roundingMode); opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode); + opStatus roundToIntegral(roundingMode); /* Sign operations. */ void changeSign(); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0f019ef..4e29879 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -228,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -6243,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d96f275..f4fe892 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2483,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 9f5ca3e..2139df5 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1765,6 +1765,32 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, return fs; } +/* Rounding-mode corrrect round to integral value. */ +APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { + opStatus fs; + assertArithmeticOK(*semantics); + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), + 1 << (semanticsPrecision(*semantics)-1)); + APFloat MagicConstant(*semantics); + fs = MagicConstant.convertFromAPInt(IntegerConstant, false, + rmNearestTiesToEven); + if (fs != opOK) + return fs; + + fs = add(MagicConstant, rounding_mode); + if (fs != opOK && fs != opInexact) + return fs; + + fs = subtract(MagicConstant, rounding_mode); + return fs; +} + + /* Comparison requires normalized numbers. */ APFloat::cmpResult APFloat::compare(const APFloat &rhs) const diff --git a/test/CodeGen/ARM/floorf.ll b/test/CodeGen/ARM/floorf.ll new file mode 100644 index 0000000..29f43fd --- /dev/null +++ b/test/CodeGen/ARM/floorf.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=arm < %s | FileCheck %s + +; CHECK: test1 +define float @test1() nounwind uwtable readnone ssp { +; CHECK-NOT: floorf + %foo = call float @floorf(float 0x4000CCCCC0000000) nounwind readnone + ret float %foo +} + +; CHECK: test2 +define float @test2() nounwind uwtable readnone ssp { +; CHECK-NOT: ceilf + %foo = call float @ceilf(float 0x4000CCCCC0000000) nounwind readnone + ret float %foo +} + +; CHECK: test3 +define float @test3() nounwind uwtable readnone ssp { +; CHECK-NOT: truncf + %foo = call float @truncf(float 0x4000CCCCC0000000) nounwind readnone + ret float %foo +} + +declare float @floorf(float) nounwind readnone +declare float @ceilf(float) nounwind readnone +declare float @truncf(float) nounwind readnone + + + |