aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/InstCombine/InstCombineAddSub.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineAddSub.cpp')
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp238
1 files changed, 173 insertions, 65 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3c5781c..a2c545f 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
/// Class representing coefficient of floating-point addend.
/// This class needs to be highly efficient, which is especially true for
/// the constructor. As of I write this comment, the cost of the default
- /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
/// perform write-merging).
- ///
+ ///
class FAddendCoef {
public:
// The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
//
FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
~FAddendCoef();
-
+
void set(short C) {
assert(!insaneIntVal(C) && "Insane coefficient");
IsFp = false; IntVal = C;
}
-
+
void set(const APFloat& C);
-
+
void negate();
-
+
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
Value *getValue(Type *) const;
-
+
// If possible, don't define operator+/operator- etc because these
// operators inevitably call FAddendCoef's constructor which is not cheap.
void operator=(const FAddendCoef &A);
void operator+=(const FAddendCoef &A);
void operator-=(const FAddendCoef &A);
void operator*=(const FAddendCoef &S);
-
+
bool isOne() const { return isInt() && IntVal == 1; }
bool isTwo() const { return isInt() && IntVal == 2; }
bool isMinusOne() const { return isInt() && IntVal == -1; }
bool isMinusTwo() const { return isInt() && IntVal == -2; }
-
+
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
@@ -74,18 +74,28 @@ namespace {
return *getFpValPtr();
}
- APFloat &getFpVal(void)
- { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-
+ APFloat &getFpVal(void) {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
bool isInt() const { return !IsFp; }
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
-
+
// True iff FpValBuf contains an instance of APFloat.
bool BufHasFpVal;
-
+
// The integer coefficient of an individual addend is either 1 or -1,
// and we try to simplify at most 4 addends from neighboring at most
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -94,7 +104,7 @@ namespace {
AlignedCharArrayUnion<APFloat> FpValBuf;
};
-
+
/// FAddend is used to represent floating-point addend. An addend is
/// represented as <C, V>, where the V is a symbolic value, and C is a
/// constant coefficient. A constant addend is represented as <C, 0>.
@@ -102,10 +112,10 @@ namespace {
class FAddend {
public:
FAddend() { Val = 0; }
-
+
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
-
+
bool isConstant() const { return Val == 0; }
bool isZero() const { return Coeff.isZero(); }
@@ -114,17 +124,17 @@ namespace {
{ Coeff.set(Coefficient); Val = V; }
void set(const ConstantFP* Coefficient, Value *V)
{ Coeff.set(Coefficient->getValueAPF()); Val = V; }
-
+
void negate() { Coeff.negate(); }
-
+
/// Drill down the U-D chain one step to find the definition of V, and
/// try to break the definition into one or two addends.
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-
+
/// Similar to FAddend::drillDownOneStep() except that the value being
/// splitted is the addend itself.
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-
+
void operator+=(const FAddend &T) {
assert((Val == T.Val) && "Symbolic-values disagree");
Coeff += T.Coeff;
@@ -132,12 +142,12 @@ namespace {
private:
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-
+
// This addend has the value of "Coeff * Val".
Value *Val;
FAddendCoef Coeff;
};
-
+
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
/// with its neighboring at most two instructions.
///
@@ -145,17 +155,17 @@ namespace {
public:
FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
Value *simplify(Instruction *FAdd);
-
+
private:
typedef SmallVector<const FAddend*, 4> AddendVect;
-
+
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
Value *performFactorization(Instruction *I);
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-
+
/// Return the number of instructions needed to emit the N-ary addition.
unsigned calcInstrNumber(const AddendVect& Vect);
Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -165,10 +175,10 @@ namespace {
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
-
+
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
-
+
private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
@@ -180,7 +190,7 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+}
//===----------------------------------------------------------------------===//
//
@@ -203,10 +213,34 @@ void FAddendCoef::set(const APFloat& C) {
} else
*P = C;
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
}
-void FAddendCoef::operator=(const FAddendCoef& That) {
+void FAddendCoef::operator=(const FAddendCoef &That) {
if (That.isInt())
set(That.IntVal);
else
@@ -222,16 +256,16 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
getFpVal().add(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
return;
}
-
+
APFloat &T = getFpVal();
- T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
void FAddendCoef::operator-=(const FAddendCoef &That) {
@@ -243,16 +277,16 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
getFpVal().subtract(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().subtract(T, RndMode);
return;
}
APFloat &T = getFpVal();
- T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
}
void FAddendCoef::operator*=(const FAddendCoef &That) {
@@ -271,15 +305,16 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
return;
}
- const fltSemantics &Semantic =
+ const fltSemantics &Semantic =
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
- set(APFloat(Semantic, IntVal));
+ convertToFpType(Semantic);
APFloat &F0 = getFpVal();
if (That.isInt())
- F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
else
F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
@@ -305,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
// A - B <1, A>, <1,B>
// 0 - B <-1, B>
// C * A, <C, A>
-// A + C <1, A> <C, NULL>
+// A + C <1, A> <C, NULL>
// 0 +/- 0 <0, NULL> (corner case)
//
// Legend: A and B are not constant, C is constant
-//
+//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
Instruction *I = 0;
@@ -380,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
return 0;
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
- if (!BreakNum || Coeff.isOne())
+ if (!BreakNum || Coeff.isOne())
return BreakNum;
Addend0.Scale(Coeff);
@@ -402,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
Value *FAddCombine::performFactorization(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-
+
Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-
+
if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
return 0;
@@ -420,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
Value *Opnd1_0 = I1->getOperand(0);
Value *Opnd1_1 = I1->getOperand(1);
- // Input Instr I Factor AddSub0 AddSub1
+ // Input Instr I Factor AddSub0 AddSub1
// ----------------------------------------------
// (x*y) +/- (x*z) x y z
// (y/x) +/- (z/x) x y z
//
Value *Factor = 0;
Value *AddSub0 = 0, *AddSub1 = 0;
-
+
if (isMpy) {
if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
Factor = Opnd0_0;
@@ -459,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (isMpy)
return createFMul(Factor, NewAddSub);
-
+
return createFDiv(NewAddSub, Factor);
}
@@ -473,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
- // Save the instruction before calling other member-functions.
+ // Save the instruction before calling other member-functions.
Instr = I;
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -484,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
unsigned Opnd0_ExpNum = 0;
unsigned Opnd1_ExpNum = 0;
- if (!Opnd0.isConstant())
+ if (!Opnd0.isConstant())
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -506,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -546,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- // step 6: Try factorization as the last resort,
+ // step 6: Try factorization as the last resort,
return performFactorization(I);
}
@@ -555,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
- // For saving intermediate results;
+ // For saving intermediate results;
unsigned NextTmpIdx = 0;
FAddend TmpResult[3];
@@ -571,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
AddendVect SimpVect;
// The outer loop works on one symbolic-value at a time. Suppose the input
- // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
// The symbolic-values will be processed in this order: x, y, z.
//
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -598,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = 0;
SimpVect.push_back(T);
}
}
@@ -611,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
R += *SimpVect[Idx];
// Pop all addends being folded and push the resulting folded addend.
- SimpVect.resize(StartIdx);
+ SimpVect.resize(StartIdx);
if (Val != 0) {
if (!R.isZero()) {
SimpVect.push_back(&R);
@@ -624,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -646,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
assert(!Opnds.empty() && "Expect at least one addend");
// Step 1: Check if the # of instructions needed exceeds the quota.
- //
+ //
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
return 0;
@@ -667,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
// Iterate the addends, creating fadd/fsub using adjacent two addends.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
I != E; I++) {
- bool NeedNeg;
+ bool NeedNeg;
Value *V = createAddendVal(**I, NeedNeg);
if (!LastVal) {
LastVal = V;
@@ -693,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
}
#ifndef NDEBUG
- assert(CreateInstrNum == InstrNeeded &&
+ assert(CreateInstrNum == InstrNeeded &&
"Inconsistent in instruction numbers");
#endif
@@ -751,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
// Adjust the number of instructions needed to emit the N-ary add.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -939,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1197,6 +1237,74 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
+ // A * (1 - uitofp i1 C) + B * (uitofp i1 C) -> select C, B, A
+ {
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *M1L, *M1R, *M2L, *M2R;
+ if (match(LHS, m_FMul(m_Value(M1L), m_Value(M1R))) &&
+ match(RHS, m_FMul(m_Value(M2L), m_Value(M2R)))) {
+
+ Value *A, *B, *C1, *C2;
+ if (!match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))))
+ std::swap(M1L, M1R);
+ if (!match(M2R, m_UIToFp(m_Value(C2))))
+ std::swap(M2L, M2R);
+
+ if (match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))) &&
+ match(M2R, m_UIToFp(m_Value(C2))) &&
+ C2->getType()->isIntegerTy(1) &&
+ C1 == C2) {
+ A = M1L;
+ B = M2L;
+ return SelectInst::Create(C1, B, A);
+ }
+
+ std::swap(M1L, M2L);
+ std::swap(M1R, M2R);
+
+ if (!match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))))
+ std::swap(M1L, M1R);
+ if (!match(M2R, m_UIToFp(m_Value(C2))))
+ std::swap(M2L, M2R);
+
+ if (match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))) &&
+ match(M2R, m_UIToFp(m_Value(C2))) &&
+ C2->getType()->isIntegerTy(1) &&
+ C1 == C2) {
+ A = M1L;
+ B = M2L;
+ return SelectInst::Create(C1, B, A);
+ }
+ }
+ }
+ }
+
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);