diff options
author | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
commit | c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch) | |
tree | 81b7dd2bb4370a392f31d332a566c903b5744764 /lib/Transforms/InstCombine | |
parent | 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff) | |
download | external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2 |
Update LLVM for rebase to r212749.
Includes a cherry-pick of:
r212948 - fixes a small issue with atomic calls
Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 6 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 266 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 40 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 82 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 153 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 102 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 310 |
12 files changed, 702 insertions, 330 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index e04b1be..ab4dc1c 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -37,8 +37,9 @@ enum SelectPatternFlavor { SPF_SMIN, SPF_UMIN, SPF_SMAX, - SPF_UMAX - // SPF_ABS - TODO. + SPF_UMAX, + SPF_ABS, + SPF_NABS }; /// getComplexity: Assign a complexity or rank value to LLVM Values... @@ -246,6 +247,7 @@ private: bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c37a9cf..99f0f1f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -865,69 +865,170 @@ Value *FAddCombine::createAddendVal return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -// dyn_castFoldableMul - If this value is a multiply that can be folded into -// other computations (because it has a constant operand), return the -// non-constant operand of the multiply, and set CST to point to the multiplier. -// Otherwise, return null. -// -static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { - if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return nullptr; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) return nullptr; - - if (I->getOpcode() == Instruction::Mul) - if ((CST = dyn_cast<Constant>(I->getOperand(1)))) - return I->getOperand(0); - if (I->getOpcode() == Instruction::Shl) - if ((CST = dyn_cast<Constant>(I->getOperand(1)))) { - // The multiplier is really 1 << CST. - CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); - return I->getOperand(0); - } - return nullptr; +// If one of the operands only has one non-zero bit, and if the other +// operand has a known-zero bit in a more significant place than it (not +// including the sign bit) the ripple may go up to and fill the zero, but +// won't change the sign. For example, (X & ~4) + 1. +static bool checkRippleForAdd(const APInt &Op0KnownZero, + const APInt &Op1KnownZero) { + APInt Op1MaybeOne = ~Op1KnownZero; + // Make sure that one of the operand has at most one bit set to 1. + if (Op1MaybeOne.countPopulation() != 1) + return false; + + // Find the most significant known 0 other than the sign bit. + int BitWidth = Op0KnownZero.getBitWidth(); + APInt Op0KnownZeroTemp(Op0KnownZero); + Op0KnownZeroTemp.clearBit(BitWidth - 1); + int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; + + int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; + assert(Op1OnePosition >= 0); + + // This also covers the case of no known zero, since in that case + // Op0ZeroPosition is -1. + return Op0ZeroPosition >= Op1OnePosition; } - /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. +/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. - // Add has the property that adding any two 2's complement numbers can only - // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values - // will sign extend fine. + // If LHS and RHS each have at least two sign bits, the addition will look + // like + // + // XX..... + + // YY..... + // + // If the carry into the most significant position is 0, X and Y can't both + // be 1 and therefore the carry out of the addition is also 0. + // + // If the carry into the most significant position is 1, X and Y can't both + // be 0 and therefore the carry out of the addition is also 1. + // + // Since the carry into the most significant position is always equal to + // the carry out of the addition, there is no signed overflow. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; + if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) { + int BitWidth = IT->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + + // Addition of two 2's compliment numbers having opposite signs will never + // overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + return true; + + // Check if carry bit of addition will not cause overflow. + if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) + return true; + if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) + return true; + } + return false; +} - // TODO: Implement. +/// WillNotOverflowUnsignedAdd - Return true if we can prove that: +/// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS)) +bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here is a simple one. + // If the sign bit of LHS and that of RHS are both zero, no unsigned wrap. + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0); + if (LHSKnownNonNegative && RHSKnownNonNegative) + return true; return false; } -Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyAssociativeOrCommutative(I); +// Checks if any operand is negative and we can convert add to sub. +// This function checks for following negative patterns +// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) +// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) +// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even +static Value *checkForNegativeOperand(BinaryOperator &I, + InstCombiner::BuilderTy *Builder) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); + // This function creates 2 instructions to replace ADD, we need at least one + // of LHS or RHS to have one use to ensure benefit in transform. + if (!LHS->hasOneUse() && !RHS->hasOneUse()) + return nullptr; - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL)) - return ReplaceInstUsesWith(I, V); + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + const APInt *C1 = nullptr, *C2 = nullptr; + + // if ONE is on other side, swap + if (match(RHS, m_Add(m_Value(X), m_One()))) + std::swap(LHS, RHS); + + if (match(LHS, m_Add(m_Value(X), m_One()))) { + // if XOR on other side, swap + if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) + std::swap(X, RHS); + + if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) { + // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) + // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) + if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { + Value *NewAnd = Builder->CreateAnd(Z, *C1); + return Builder->CreateSub(RHS, NewAnd, "sub"); + } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { + // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) + // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) + Value *NewOr = Builder->CreateOr(Z, ~(*C1)); + return Builder->CreateSub(RHS, NewOr, "sub"); + } + } + } + + // Restore LHS and RHS + LHS = I.getOperand(0); + RHS = I.getOperand(1); + + // if XOR is on other side, swap + if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) + std::swap(LHS, RHS); + + // C2 is ODD + // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2)) + // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) + if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) + if (C1->countTrailingZeros() == 0) + if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { + Value *NewOr = Builder->CreateOr(Z, ~(*C2)); + return Builder->CreateSub(RHS, NewOr, "sub"); + } + return nullptr; +} + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); - // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), DL)) + return ReplaceInstUsesWith(I, V); + + // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); @@ -1025,23 +1126,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); - - { - Constant *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); - - // X*C1 + X*C2 --> X * (C1+C2) - Constant *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } - - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); - } + if (Value *V = checkForNegativeOperand(I, Builder)) + return ReplaceInstUsesWith(I, V); // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { @@ -1059,29 +1145,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // W*X + Y*Z --> W * (X+Z) iff W == Y - { - Value *W, *X, *Y, *Z; - if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && - match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { - if (W != Y) { - if (W == Z) { - std::swap(Y, Z); - } else if (Y == X) { - std::swap(W, X); - } else if (X == Z) { - std::swap(Y, Z); - std::swap(W, X); - } - } - - if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); - return BinaryOperator::CreateMul(W, NewAdd); - } - } - } - if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Value *X; if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X @@ -1191,6 +1254,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); } + // TODO(jingyue): Consider WillNotOverflowSignedAdd and + // WillNotOverflowUnsignedAdd to reduce the number of invocations of + // computeKnownBits. + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS)) { + Changed = true; + I.setHasNoSignedWrap(true); + } + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS)) { + Changed = true; + I.setHasNoUnsignedWrap(true); + } + return Changed ? &I : nullptr; } @@ -1478,9 +1553,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(Y, Y->getName() + ".not")); - // 0 - (X sdiv C) -> (X sdiv -C) - if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && - match(Op0, m_Zero())) + // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && + !C->isMinSignedValue()) return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); // 0 - (X << Y) -> (-X << Y) when X is freely negatable. @@ -1488,19 +1563,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Value *XNeg = dyn_castNegVal(X)) return BinaryOperator::CreateShl(XNeg, Y); - // X - X*C --> X * (1-C) - if (match(Op1, m_Mul(m_Specific(Op0), m_Constant(CI)))) { - Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); - return BinaryOperator::CreateMul(Op0, CP1); - } - - // X - X<<C --> X * (1-(1<<C)) - if (match(Op1, m_Shl(m_Specific(Op0), m_Constant(CI)))) { - Constant *One = ConstantInt::get(I.getType(), 1); - C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); - return BinaryOperator::CreateMul(Op0, C); - } - // X - A*-B -> X + A*B // X - -A*B -> X + A*B Value *A, *B; @@ -1517,16 +1579,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } } - Constant *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - Constant *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (DL) { @@ -1541,7 +1593,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); - } + } return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4f5d65a..b23a606 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - if ((C1->getValue() & C2->getValue()) == 0) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d4b583b..658178d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -421,6 +421,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } + + // We can strength reduce reduce this signed add into a regular add if we + // can prove that it will never overflow. + if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (WillNotOverflowSignedAdd(LHS, RHS)) { + Value *Add = Builder->CreateNSWAdd(LHS, RHS); + Add->takeName(&CI); + Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Add, 0); + } + } + break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: @@ -800,6 +815,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // Note that ppc_altivec_vperm has a big-endian bias, so when creating + // a vectorshuffle for little endian, we must undo the transformation + // performed on vec_perm in altivec.h. That is, we must complement + // the permutation mask with respect to 31 and reverse the order of + // V1 and V2. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { assert(Mask->getType()->getVectorNumElements() == 16 && "Bad type for intrinsic!"); @@ -832,10 +852,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. + if (DL && DL->isLittleEndian()) + Idx = 31 - Idx; if (!ExtractedElts[Idx]) { + Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); } @@ -913,6 +937,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::AMDGPU_rcp: { + if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) { + const APFloat &ArgVal = C->getValueAPF(); + APFloat Val(ArgVal.getSemantics(), 1.0); + APFloat::opStatus Status = Val.divide(ArgVal, + APFloat::rmNearestTiesToEven); + // Only do this if it was exact and therefore not dependent on the + // rounding mode. + if (Status == APFloat::opOK) + return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); + } + + break; + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 356803a..ff083d7 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1434,7 +1434,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { + if (GEP->hasAllZeroIndices() && + // If CI is an addrspacecast and GEP changes the poiner type, merging + // GEP into CI would undo canonicalizing addrspacecast with different + // pointer types, causing infinite loops. + (!isa<AddrSpaceCastInst>(CI) || + GEP->getType() == GEP->getPointerOperand()->getType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. @@ -1904,5 +1909,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + // If the destination pointer element type is not the the same as the source's + // do the addrspacecast to the same type, and then the bitcast in the new + // address space. This allows the cast to be exposed to other transforms. + Value *Src = CI.getOperand(0); + PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType()); + PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType()); + + Type *DestElemTy = DestTy->getElementType(); + if (SrcTy->getElementType() != DestElemTy) { + Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); + if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) { + // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, VT->getNumElements()); + } + + Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + return new AddrSpaceCastInst(NewBitCast, CI.getType()); + } + return commonPointerCastTransforms(CI); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 02e8bf1..5e71c5c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -612,9 +612,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (ICmpInst::isSigned(Cond)) return nullptr; - // Look through bitcasts. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) - RHS = BCI->getOperand(0); + // Look through bitcasts and addrspacecasts. We do not however want to remove + // 0 GEPs. + if (!isa<GetElementPtrInst>(RHS)) + RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { @@ -655,9 +656,24 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *LOffset = EmitGEPOffset(GEPLHS); + Value *ROffset = EmitGEPOffset(GEPRHS); + + // If we looked through an addrspacecast between different sized address + // spaces, the LHS and RHS pointers are different sized + // integers. Truncate to the smaller one. + Type *LHSIndexTy = LOffset->getType(); + Type *RHSIndexTy = ROffset->getType(); + if (LHSIndexTy != RHSIndexTy) { + if (LHSIndexTy->getPrimitiveSizeInBits() < + RHSIndexTy->getPrimitiveSizeInBits()) { + ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); + } else + LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); + } + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), - EmitGEPOffset(GEPLHS), - EmitGEPOffset(GEPRHS)); + LOffset, ROffset); return ReplaceInstUsesWith(I, Cmp); } @@ -667,26 +683,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If one of the GEPs has all zero indices, recurse. - bool AllZeros = true; - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPLHS->getOperand(i)) || - !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) + if (GEPLHS->hasAllZeroIndices()) return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), ICmpInst::getSwappedPredicate(Cond), I); // If the other GEP has all zero indices, recurse. - AllZeros = true; - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPRHS->getOperand(i)) || - !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) + if (GEPRHS->hasAllZeroIndices()) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); @@ -2026,9 +2028,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, /// replacement required. static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, Value *OtherVal, InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(MulVal->getType())) + return nullptr; + assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); - assert(isa<IntegerType>(MulVal->getType())); Instruction *MulInstr = cast<Instruction>(MulVal); assert(MulInstr->getOpcode() == Instruction::Mul); @@ -2523,7 +2529,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + if (~Op1KnownZero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; ConstantInt *LHSC = nullptr; @@ -2533,11 +2539,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". + // or turn "((1 << x)&7) == 0" into "x > 2". Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { - unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); - return new ICmpInst(ICmpInst::ICMP_NE, X, - ConstantInt::get(X->getType(), CmpVal)); + APInt ValToCheck = Op0KnownZeroInverted; + if (ValToCheck.isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } else if ((++ValToCheck).isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros() - 1; + return new ICmpInst(ICmpInst::ICMP_UGT, X, + ConstantInt::get(X->getType(), CmpVal)); + } } // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, @@ -2560,7 +2574,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + if (~Op1KnownZero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; ConstantInt *LHSC = nullptr; @@ -2570,11 +2584,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". + // or turn "((1 << x)&7) != 0" into "x < 3". Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { - unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); - return new ICmpInst(ICmpInst::ICMP_EQ, X, - ConstantInt::get(X->getType(), CmpVal)); + APInt ValToCheck = Op0KnownZeroInverted; + if (ValToCheck.isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } else if ((++ValToCheck).isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_ULT, X, + ConstantInt::get(X->getType(), CmpVal)); + } } // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 66d0938..c10e92a 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -50,99 +50,102 @@ static bool pointsToConstantGlobal(Value *V) { /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - SmallVectorImpl<Instruction *> &ToDelete, - bool IsOffset = false) { + SmallVectorImpl<Instruction *> &ToDelete) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. - for (Use &U : V->uses()) { - Instruction *I = cast<Instruction>(U.getUser()); - - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal( - GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) - return false; - continue; - } + SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; + ValuesToInspect.push_back(std::make_pair(V, false)); + while (!ValuesToInspect.empty()) { + auto ValuePair = ValuesToInspect.pop_back_val(); + const bool IsOffset = ValuePair.second; + for (auto &U : ValuePair.first->uses()) { + Instruction *I = cast<Instruction>(U.getUser()); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } - if (CallSite CS = I) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(&U)) + if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { + // If uses of the bitcast are ok, we are ok. + ValuesToInspect.push_back(std::make_pair(I, IsOffset)); continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + ValuesToInspect.push_back( + std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); + continue; + } - // Inalloca arguments are clobbered by the call. - unsigned ArgNo = CS.getArgumentNo(&U); - if (CS.isInAllocaArgument(ArgNo)) - return false; + if (CallSite CS = I) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(&U)) + continue; - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; + // Inalloca arguments are clobbered by the call. + unsigned ArgNo = CS.getArgumentNo(&U); + if (CS.isInAllocaArgument(ArgNo)) + return false; - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - ToDelete.push_back(II); - continue; + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } } - } - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast<MemTransferInst>(I); - if (!MI) - return false; + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(I); + if (!MI) + return false; - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (U.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (U.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (IsOffset) return false; + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (U.getOperandNo() != 0) return false; + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (U.getOperandNo() != 0) return false; - // If the source of the memcpy/move is not a constant global, reject it. - if (!pointsToConstantGlobal(MI->getSource())) - return false; + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } } return true; } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 9996ebc..6c6e7d8 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Add = Builder->CreateMul(X, Op1); - return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1)); + Value *Mul = Builder->CreateMul(C1, Op1); + // Only go forward with the transform if C1*CI simplifies to a tidier + // constant. + if (!match(Mul, m_Mul(m_Value(), m_Value()))) + return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); } } } @@ -990,6 +993,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } if (Constant *RHS = dyn_cast<Constant>(Op1)) { + // X/INT_MIN -> X == INT_MIN + if (RHS->isMinSignedValue()) + return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType()); + // -X/C --> X/-C provided the negation doesn't overflow. if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9a41e4b..06c9e29 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -31,13 +31,18 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); if (!ICI) return SPF_UNKNOWN; - LHS = ICI->getOperand(0); - RHS = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + LHS = CmpLHS; + RHS = CmpRHS; // (icmp X, Y) ? X : Y - if (SI->getTrueValue() == ICI->getOperand(0) && - SI->getFalseValue() == ICI->getOperand(1)) { - switch (ICI->getPredicate()) { + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { + switch (Pred) { default: return SPF_UNKNOWN; // Equality. case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return SPF_UMAX; @@ -51,18 +56,35 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { } // (icmp X, Y) ? Y : X - if (SI->getTrueValue() == ICI->getOperand(1) && - SI->getFalseValue() == ICI->getOperand(0)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + switch (Pred) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) { + if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || + (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { + + // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X + // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X + if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; + } + + // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X + // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X + if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { + return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; + } } } @@ -365,7 +387,15 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, /// 1. The icmp predicate is inverted /// 2. The select operands are reversed /// 3. The magnitude of C2 and C1 are flipped -static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, +/// +/// This also tries to turn +/// --- Single bit tests: +/// if ((x & C) == 0) x |= C to x |= C +/// if ((x & C) != 0) x ^= C to x &= ~C +/// if ((x & C) == 0) x ^= C to x |= C +/// if ((x & C) != 0) x &= ~C to x &= ~C +/// if ((x & C) == 0) x &= ~C to nothing +static Value *foldSelectICmpAndOr(SelectInst &SI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); @@ -384,6 +414,25 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, return nullptr; const APInt *C2; + if (match(TrueVal, m_Specific(X))) { + // if ((X & C) != 0) X ^= C becomes X &= ~C + if (match(FalseVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2) + return Builder->CreateAnd(X, ~(*C1)); + // if ((X & C) != 0) X &= ~C becomes X &= ~C + if (match(FalseVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2)) + return FalseVal; + } else if (match(FalseVal, m_Specific(X))) { + // if ((X & C) == 0) X ^= C becomes X |= C + if (match(TrueVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2) + return Builder->CreateOr(X, *C1); + // if ((X & C) == 0) X &= ~C becomes nothing + if (match(TrueVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2)) + return X; + // if ((X & C) == 0) X |= C becomes X |= C + if (match(TrueVal, m_Or(m_Specific(X), m_APInt(C2))) && C1 == C2) + return TrueVal; + } + bool OrOnTrueVal = false; bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); if (!OrOnFalseVal) @@ -677,6 +726,22 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, } } } + + // ABS(ABS(X)) -> ABS(X) + // NABS(NABS(X)) -> NABS(X) + if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) { + return ReplaceInstUsesWith(Outer, Inner); + } + + // ABS(NABS(X)) -> ABS(X) + // NABS(ABS(X)) -> NABS(X) + if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) || + (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { + SelectInst *SI = cast<SelectInst>(Inner); + Value *NewSI = Builder->CreateSelect( + SI->getCondition(), SI->getFalseValue(), SI->getTrueValue()); + return ReplaceInstUsesWith(Outer, NewSI); + } return nullptr; } @@ -981,7 +1046,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // TODO. // ABS(-X) -> ABS(X) - // ABS(ABS(X)) -> ABS(X) } // See if we can fold the select into a phi node if the condition is a select. diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index cc6665c..2495747 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the left shift is just shifting out partial signbits, delete the - // extension. - if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) - return ReplaceInstUsesWith(I, X); - // If the input is an extension from the shifted amount value, e.g. // %x = zext i8 %A to i32 // %y = shl i32 %x, 24 diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 8c5e202..cb16584 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,7 +144,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // If the operand is the PHI induction variable: if (PHIInVal == PHIUser) { // Scalarize the binary operation. Its first operand is the - // scalar PHI and the second operand is extracted from the other + // scalar PHI, and the second operand is extracted from the other // vector operand. BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; @@ -361,7 +361,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. - // Okay, we can handle this if the vector we are insertinting into is + // We can handle this if the vector we are inserting into is // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted undef. @@ -376,7 +376,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { - // Okay, we can handle this if the vector we are insertinting into is + // We can handle this if the vector we are inserting into is // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. @@ -403,7 +403,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// We are building a shuffle to create V, which is a sequence of insertelement, /// extractelement pairs. If PermittedRHS is set, then we must either use it or -/// not rely on the second vector source. Return an std::pair containing the +/// not rely on the second vector source. Return a std::pair containing the /// left and right vectors of the proposed shuffle (or 0), and set the Mask /// parameter as required. /// diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4c36887..08e2446 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -395,6 +396,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, return false; } +/// This function returns identity value for given opcode, which can be used to +/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). +static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) { + if (isa<Constant>(V)) + return nullptr; + + if (OpCode == Instruction::Mul) + return ConstantInt::get(V->getType(), 1); + + // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc. + + return nullptr; +} + +/// This function factors binary ops which can be combined using distributive +/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4). +static Instruction::BinaryOps +getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) { + if (!Op) + return Instruction::BinaryOpsEnd; + + if (Op->getOpcode() == Instruction::Shl) { + if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) { + // The multiplier is really 1 << CST. + RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); + LHS = Op->getOperand(0); + return Instruction::Mul; + } + } + + // TODO: We can add other conversions e.g. shr => div etc. + + LHS = Op->getOperand(0); + RHS = Op->getOperand(1); + return Op->getOpcode(); +} + +/// This tries to simplify binary operations by factorizing out common terms +/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). +static Value *tryFactorization(InstCombiner::BuilderTy *Builder, + const DataLayout *DL, BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, Value *A, + Value *B, Value *C, Value *D) { + + // If any of A, B, C, D are null, we can not factor I, return early. + // Checking A and C should be enough. + if (!A || !C || !B || !D) + return nullptr; + + Value *SimplifiedInst = nullptr; + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + } + } + + if (SimplifiedInst) { + ++NumFactor; + SimplifiedInst->takeName(&I); + + // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. + // TODO: Check for NUW. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) { + if (isa<OverflowingBinaryOperator>(SimplifiedInst)) { + bool HasNSW = false; + if (isa<OverflowingBinaryOperator>(&I)) + HasNSW = I.hasNoSignedWrap(); + + if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) + if (isa<OverflowingBinaryOperator>(Op0)) + HasNSW &= Op0->hasNoSignedWrap(); + + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS)) + if (isa<OverflowingBinaryOperator>(Op1)) + HasNSW &= Op1->hasNoSignedWrap(); + BO->setHasNoSignedWrap(HasNSW); + } + } + } + return SimplifiedInst; +} + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations /// which some other binary operation distributes over either by factorizing /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this @@ -404,65 +526,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); - Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op // Factorization. - if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { - // The instruction has the form "(A op' B) op (C op' D)". Try to factorize - // a common term. - Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); - Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); - Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; + Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B); + Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D); + + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + if (LHSOpcode == RHSOpcode) { + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) + return V; + } - // Does "X op' Y" always equal "Y op' X"? - bool InnerCommutative = Instruction::isCommutative(InnerOpcode); - - // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? - if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) - // Does the instruction have the form "(A op' B) op (A op' D)" or, in the - // commutative case, "(A op' B) op (C op' A)"? - if (A == C || (InnerCommutative && A == D)) { - if (A != C) - std::swap(C, D); - // Consider forming "A op' (B op D)". - // If "B op D" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); - // If "B op D" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, A, V); - V->takeName(&I); - return V; - } - } + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, + getIdentityValue(LHSOpcode, RHS))) + return V; - // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? - if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) - // Does the instruction have the form "(A op' B) op (C op' B)" or, in the - // commutative case, "(A op' B) op (B op' D)"? - if (B == D || (InnerCommutative && B == C)) { - if (B != D) - std::swap(C, D); - // Consider forming "(A op C) op' B". - // If "A op C" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); - // If "A op C" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, V, B); - V->takeName(&I); - return V; - } - } - } + // The instruction has the form "(B) op (C op' D)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, + getIdentityValue(RHSOpcode, LHS), C, D)) + return V; // Expansion. + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. @@ -1030,6 +1120,12 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { return nullptr; } + // If Op is zero then Val = Op * Scale. + if (match(Op, m_Zero())) { + NoSignedWrap = true; + return Op; + } + // We know that we can successfully descale, so from here on we can safely // modify the IR. Op holds the descaled version of the deepest term in the // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known @@ -1106,6 +1202,11 @@ static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; + // It may not be safe to reorder shuffles and things like div, urem, etc. + // because we may trap when executing those ops on unknown vector elements. + // See PR20059. + if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; + unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); @@ -1138,7 +1239,9 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); - if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) && + if (Shuffle && C1 && + (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) && + isa<UndefValue>(Shuffle->getOperand(1)) && Shuffle->getType() == Shuffle->getOperand(0)->getType()) { SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); // Find constant C2 that has property: @@ -1220,6 +1323,91 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (MadeChange) return &GEP; } + // Check to see if the inputs to the PHI node are getelementptr instructions. + if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { + GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0)); + if (!Op1) + return nullptr; + + signed DI = -1; + + for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { + GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I); + if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) + return nullptr; + + // Keep track of the type as we walk the GEP. + Type *CurTy = Op1->getOperand(0)->getType()->getScalarType(); + + for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { + if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) + return nullptr; + + if (Op1->getOperand(J) != Op2->getOperand(J)) { + if (DI == -1) { + // We have not seen any differences yet in the GEPs feeding the + // PHI yet, so we record this one if it is allowed to be a + // variable. + + // The first two arguments can vary for any GEP, the rest have to be + // static for struct slots + if (J > 1 && CurTy->isStructTy()) + return nullptr; + + DI = J; + } else { + // The GEP is different by more than one input. While this could be + // extended to support GEPs that vary by more than one variable it + // doesn't make sense since it greatly increases the complexity and + // would result in an R+R+R addressing mode which no backend + // directly supports and would need to be broken into several + // simpler instructions anyway. + return nullptr; + } + } + + // Sink down a layer of the type for the next iteration. + if (J > 0) { + if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) { + CurTy = CT->getTypeAtIndex(Op1->getOperand(J)); + } else { + CurTy = nullptr; + } + } + } + } + + GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone()); + + if (DI == -1) { + // All the GEPs feeding the PHI are identical. Clone one down into our + // BB so that it can be merged with the current GEP. + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + } else { + // All the GEPs feeding the PHI differ at a single offset. Clone a GEP + // into the current block so it can be merged, and create a new PHI to + // set that index. + Instruction *InsertPt = Builder->GetInsertPoint(); + Builder->SetInsertPoint(PN); + PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); + Builder->SetInsertPoint(InsertPt); + + for (auto &I : PN->operands()) + NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI), + PN->getIncomingBlock(I)); + + NewGEP->setOperand(DI, NewPN); + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + NewGEP->setOperand(DI, NewPN); + } + + GEP.setOperand(0, NewGEP); + PtrOp = NewGEP; + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. @@ -2014,7 +2202,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // Simplify the list of clauses, eg by removing repeated catch clauses // (these are often created by inlining). bool MakeNewInstruction = false; // If true, recreate using the following: - SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction; bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. @@ -2022,8 +2210,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { bool isLastClause = i + 1 == e; if (LI.isCatch(i)) { // A catch clause. - Value *CatchClause = LI.getClause(i); - Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + Constant *CatchClause = LI.getClause(i); + Constant *TypeInfo = CatchClause->stripPointerCasts(); // If we already saw this clause, there is no point in having a second // copy of it. @@ -2052,7 +2240,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // equal (for example if one represents a C++ class, and the other some // class derived from it). assert(LI.isFilter(i) && "Unsupported landingpad clause!"); - Value *FilterClause = LI.getClause(i); + Constant *FilterClause = LI.getClause(i); ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); unsigned NumTypeInfos = FilterType->getNumElements(); @@ -2096,8 +2284,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // catch-alls. If so, the filter can be discarded. bool SawCatchAll = false; for (unsigned j = 0; j != NumTypeInfos; ++j) { - Value *Elt = Filter->getOperand(j); - Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + Constant *Elt = Filter->getOperand(j); + Constant *TypeInfo = Elt->stripPointerCasts(); if (isCatchAll(Personality, TypeInfo)) { // This element is a catch-all. Bail out, noting this fact. SawCatchAll = true; @@ -2202,7 +2390,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { continue; // If Filter is a subset of LFilter, i.e. every element of Filter is also // an element of LFilter, then discard LFilter. - SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j; + SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j; // If Filter is empty then it is a subset of LFilter. if (!FElts) { // Discard LFilter. |