diff options
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 88 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 142 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 148 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 63 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineInternal.h | 24 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 203 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 30 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombinePHI.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 98 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 25 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 148 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 258 |
14 files changed, 696 insertions, 564 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 752f79d..c608f84 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Addition of two 2's compliment numbers having opposite signs will never // overflow. @@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Subtraction of two 2's compliment numbers having identical signs will // never overflow. @@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, + &CxtI); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, + &CxtI); if (LHSKnownNegative && RHSKnownNonNegative) return true; @@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I, } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyAssociativeOrCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) + return ReplaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -1243,7 +1245,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1256,10 +1258,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of sexts), and if the // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); @@ -1307,7 +1310,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // TODO(jingyue): Consider WillNotOverflowSignedAdd and // WillNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -1371,7 +1374,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1384,10 +1387,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of int->fp conversions), // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); @@ -1436,8 +1440,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { - assert(DL && "Must have target data info for this"); - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -1662,26 +1664,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". - if (DL) { - Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - } + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) { + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 863eeaf..ee21c81 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -979,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. ConstantRange LHSRange = - ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue()); ConstantRange RHSRange = - ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue()); if (LHSRange.intersectWith(RHSRange).isEmptySet()) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); @@ -1709,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI, - DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI, - DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 05e7162..21243c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -61,8 +60,8 @@ static Type *reduceToSingleValueType(Type *T) { } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -108,7 +107,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); - if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { + if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. SrcETy = reduceToSingleValueType(SrcETy); @@ -156,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -198,6 +197,71 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit +/// source vectors, unless a zero bit is set. If a zero bit is set, +/// then ignore that half of the mask and clear that half of the vector. +static Value *SimplifyX86vperm2(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [1:0] - select 128 bits from sources for low half of destination + // [2] - ignore + // [3] - zero low half of destination + // [5:4] - select 128 bits from sources for high half of destination + // [6] - ignore + // [7] - zero high half of destination + + uint8_t Imm = CInt->getZExtValue(); + + bool LowHalfZero = Imm & 0x08; + bool HighHalfZero = Imm & 0x80; + + // If both zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (LowHalfZero && HighHalfZero) + return ZeroVector; + + // If 0 or 1 zero mask bits are set, this is a simple shuffle. + unsigned NumElts = VecTy->getNumElements(); + unsigned HalfSize = NumElts / 2; + SmallVector<int, 8> ShuffleMask(NumElts); + + // The high bit of the selection field chooses the 1st or 2nd operand. + bool LowInputSelect = Imm & 0x02; + bool HighInputSelect = Imm & 0x20; + + // The low bit of the selection field chooses the low or high half + // of the selected operand. + bool LowHalfSelect = Imm & 0x01; + bool HighHalfSelect = Imm & 0x10; + + // Determine which operand(s) are actually in use for this instruction. + Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + + // If needed, replace operands based on zero mask. + V0 = LowHalfZero ? ZeroVector : V0; + V1 = HighHalfZero ? ZeroVector : V1; + + // Permute low half of result. + unsigned StartIndex = LowHalfSelect ? HalfSize : 0; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i] = StartIndex + i; + + // Permute high half of result. + StartIndex = HighHalfSelect ? HalfSize : 0; + StartIndex += NumElts; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i + HalfSize] = StartIndex + i; + + return Builder.CreateShuffleVector(V0, V1, ShuffleMask); + } + return nullptr; +} + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -386,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // can prove that it will never overflow. if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, II)) { + if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); } } @@ -407,11 +471,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, II)) { + if (WillNotOverflowSignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); } } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); } } @@ -452,7 +516,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, II)) { + if (WillNotOverflowSignedMul(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); } } @@ -544,7 +608,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -561,7 +625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -578,7 +642,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -587,7 +651,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvlfd: // Turn PPC QPX qvlfd -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >= 32) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -596,7 +660,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfs: // Turn PPC QPX qvstfs -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -606,7 +670,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfd: // Turn PPC QPX qvstfd -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -618,7 +682,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); @@ -735,9 +799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned LowHalfElts = VWidth / 2; APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); APInt UndefElts(VWidth, 0); - if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), - InputDemandedElts, - UndefElts)) { + if (Value *TmpV = SimplifyDemandedVectorElts( + II->getArgOperand(0), InputDemandedElts, UndefElts)) { II->setArgOperand(0, TmpV); return II; } @@ -906,6 +969,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, Shuffle); } + case Intrinsic::x86_avx_vperm2f128_pd_256: + case Intrinsic::x86_avx_vperm2f128_ps_256: + case Intrinsic::x86_avx_vperm2f128_si_256: + case Intrinsic::x86_avx2_vperm2i128: + if (Value *V = SimplifyX86vperm2(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating @@ -945,12 +1016,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (DL && DL->isLittleEndian()) + if (DL.isLittleEndian()) Idx = 31 - Idx; if (!ExtractedElts[Idx]) { - Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; - Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; + Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); @@ -979,7 +1050,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { - unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT); + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { @@ -1118,7 +1189,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { RHS->getType()->isPointerTy() && cast<Constant>(RHS)->isNullValue()) { LoadInst* LI = cast<LoadInst>(LHS); - if (isValidAssumeForContext(II, LI, DL, DT)) { + if (isValidAssumeForContext(II, LI, DT)) { MDNode *MD = MDNode::get(II->getContext(), None); LI->setMetadata(LLVMContext::MD_nonnull, MD); return EraseInstFromFunction(*II); @@ -1192,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { /// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, - const CastInst * const CI, - const DataLayout * const DL, + const DataLayout &DL, + const CastInst *const CI, const int ix) { if (!CI->isLosslessCast()) return false; @@ -1217,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; - if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) + if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy)) return false; return true; } @@ -1226,7 +1297,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. -Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; auto InstCombineRAUW = [this](Instruction *From, Value *With) { @@ -1391,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), E = CS.arg_end(); I != E; ++I, ++ix) { CastInst *CI = dyn_cast<CastInst>(*I); - if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { + if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) { *I = CI->getOperand(0); Changed = true; } @@ -1408,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // this. None of these calls are seen as possibly dead so go ahead and // delete the instruction now. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { - Instruction *I = tryOptimizeCall(CI, DL); + Instruction *I = tryOptimizeCall(CI); // If we changed something return the result, etc. Otherwise let // the fallthrough check. if (I) return EraseInstFromFunction(*I); @@ -1487,7 +1558,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // // into: // call void @takes_i32_inalloca(i32* null) - if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + // + // Similarly, avoid folding away bitcasts of byval calls. + if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || + Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return false; CallSite::arg_iterator AI = CS.arg_begin(); @@ -1512,12 +1586,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); - if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) + if (!ParamPTy || !ParamPTy->getElementType()->isSized()) return false; Type *CurElTy = ActTy->getPointerElementType(); - if (DL->getTypeAllocSize(CurElTy) != - DL->getTypeAllocSize(ParamPTy->getElementType())) + if (DL.getTypeAllocSize(CurElTy) != + DL.getTypeAllocSize(ParamPTy->getElementType())) return false; } } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 3e2b719..fe544c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// try to eliminate the cast by moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { - // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return nullptr; - PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); @@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); + unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy); if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly @@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // same, we open the door to infinite loops of various kinds. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); + uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. - uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); - uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); + uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy); if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array @@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, PHINode *OPN = cast<PHINode>(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + Value *V = + EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); } Res = NPN; @@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - Type *DstTy, ///< The target type for the second cast instruction - const DataLayout *DL ///< The target data for pointer size -) { - +isEliminableCastPair(const CastInst *CI, ///< First cast instruction + unsigned opcode, ///< Opcode for the second cast + Type *DstTy, ///< Target type for the second cast + const DataLayout &DL) { Type *SrcTy = CI->getOperand(0)->getType(); // A from above Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : nullptr; - Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : nullptr; - Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : nullptr; + Type *SrcIntPtrTy = + SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; + Type *MidIntPtrTy = + MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; + Type *DstIntPtrTy = + DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); @@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (isa<PHINode>(Src)) { // We don't do this if this would create a PHI node with an illegal type if // it is currently legal. - if (!Src->getType()->isIntegerTy() || - !CI.getType()->isIntegerTy() || + if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || ShouldChangeType(CI.getType(), Src->getType())) if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; @@ -1419,18 +1413,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - - if (DL) { - unsigned AS = CI.getAddressSpace(); - if (CI.getOperand(0)->getType()->getScalarSizeInBits() != - DL->getPointerSizeInBits(AS)) { - Type *Ty = DL->getIntPtrType(CI.getContext(), AS); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); - return new IntToPtrInst(P, CI.getType()); - } + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + DL.getPointerSizeInBits(AS)) { + Type *Ty = DL.getIntPtrType(CI.getContext(), AS); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) @@ -1460,32 +1451,33 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return &CI; } - if (!DL) - return commonCastTransforms(CI); - // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. unsigned AS = GEP->getPointerAddressSpace(); - unsigned OffsetBits = DL->getPointerSizeInBits(AS); + unsigned OffsetBits = DL.getPointerSizeInBits(AS); APInt Offset(OffsetBits, 0); BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); - if (GEP->hasOneUse() && - BCI && - GEP->accumulateConstantOffset(*DL, Offset)) { + if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { + // FIXME: This is insufficiently tested - just a no-crash test + // (test/Transforms/InstCombine/2007-05-14-Crash.ll) + // // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = BCI->getOperand(0); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OrigBase->getType(), - Offset.getSExtValue(), + if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), NewIndices)) { + // FIXME: This codepath is completely untested - could be unreachable + // for all I know. // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, NewIndices) : - Builder->CreateGEP(OrigBase, NewIndices); + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() + ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) + : Builder->CreateGEP( + OrigBase->getType()->getPointerElementType(), + OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1504,16 +1496,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (!DL) - return commonPointerCastTransforms(CI); - Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS)) + if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) return commonPointerCastTransforms(CI); - Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS); + Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); @@ -1597,8 +1586,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. static bool CollectInsertionElements(Value *V, unsigned Shift, - SmallVectorImpl<Value*> &Elements, - Type *VecEltTy, InstCombiner &IC) { + SmallVectorImpl<Value *> &Elements, + Type *VecEltTy, bool isBigEndian) { assert(isMultipleOfTypeSize(Shift, VecEltTy) && "Shift should be a multiple of the element type size"); @@ -1614,7 +1603,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, return true; unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); - if (IC.getDataLayout()->isBigEndian()) + if (isBigEndian) ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. @@ -1634,7 +1623,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, // it to the right type so it gets properly inserted. if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), - Shift, Elements, VecEltTy, IC); + Shift, Elements, VecEltTy, isBigEndian); // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. @@ -1649,7 +1638,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC)) + if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + isBigEndian)) return false; } return true; @@ -1662,28 +1652,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC) && - CollectInsertionElements(I->getOperand(1), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian) && + CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); } } @@ -1706,15 +1696,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, - DestVecTy->getElementType(), IC)) + DestVecTy->getElementType(), + IC.getDataLayout().isBigEndian())) return nullptr; // If we succeeded, we know that all of the element are specified by Elements @@ -1734,10 +1722,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC, + const DataLayout &DL) { Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1760,7 +1746,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = 0; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1784,7 +1770,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1839,7 +1825,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) - if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL)) return I; if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index f48d89b..803b50a 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { - // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && !DL) - return nullptr; - Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) return nullptr; @@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { @@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) Idx = Builder->CreateTrunc(Idx, IntPtrTy); @@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // - Default to i32 if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) Ty = Idx->getType(); - else if (DL) - Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); - else if (ArrayElementCount <= 32) - Ty = Type::getInt32Ty(Init->getContext()); + else + Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); @@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// /// If we can't emit an optimized form for this expression, this returns null. /// -static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { - const DataLayout &DL = *IC.getDataLayout(); +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, + const DataLayout &DL) { gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { + if (PtrBase == RHS && GEPLHS->isInBounds()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL); // If not, synthesize the offset the hard way. if (!Offset) @@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold // the compare with the adjusted indices. - if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == - GEPRHS->getOperand(0)->stripPointerCasts()) { + GEPRHS->getOperand(0)->stripPointerCasts()) { Value *LOffset = EmitGEPOffset(GEPLHS); Value *ROffset = EmitGEPOffset(GEPRHS); @@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if (DL && - GEPsInBounds && - (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS); @@ -1928,8 +1919,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. - if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && - DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { + if (LHSCI->getOpcode() == Instruction::PtrToInt && + DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { Value *RHSOp = nullptr; if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) { Value *RHSCIOp = RHSC->getOperand(0); @@ -2660,8 +2651,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); - else if (DL) // Pointers require DL info to get their size. - BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); + else // Get pointer size. + BitWidth = DL.getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; @@ -2774,8 +2765,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + APInt::getAllOnesValue(BitWidth), Op1KnownZero, + Op1KnownOne, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -3094,9 +3085,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 - if (RHSC->isNullValue() && DL && - DL->getIntPtrType(RHSC->getType()) == - LHSI->getOperand(0)->getType()) + if (RHSC->isNullValue() && + DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); break; @@ -3428,7 +3418,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); @@ -3563,6 +3553,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } + // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0 + if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) { + unsigned TypeBits = Cst1->getBitWidth(); + unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + if (ShAmt < TypeBits && ShAmt != 0) { + Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); + Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + I.getName() + ".mask"); + return new ICmpInst(I.getPredicate(), And, + Constant::getNullValue(Cst1->getType())); + } + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 2fd5318..fb2321d 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -158,10 +158,10 @@ private: AssumptionCache *AC; TargetLibraryInfo *TLI; DominatorTree *DT; + const DataLayout &DL; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. - const DataLayout *DL; LoopInfo *LI; bool MadeIRChange; @@ -169,7 +169,7 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const DataLayout *DL, LoopInfo *LI) + DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} @@ -180,7 +180,7 @@ public: AssumptionCache *getAssumptionCache() const { return AC; } - const DataLayout *getDataLayout() const { return DL; } + const DataLayout &getDataLayout() const { return DL; } DominatorTree *getDominatorTree() const { return DT; } @@ -330,17 +330,17 @@ private: Type *Ty); Instruction *visitCallSite(CallSite CS); - Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL); + Instruction *tryOptimizeCall(CallInst *CI); bool transformConstExprCastCall(CallSite CS); Instruction *transformCallThroughTrampoline(CallSite CS, IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); @@ -372,6 +372,10 @@ public: /// I to the worklist, replace all uses of I with the new value, then return /// I, so that the inst combiner will know that I was modified. Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + // If there are no uses to replace, then we return nullptr to indicate that + // no changes were made to the program. + if (I.use_empty()) return nullptr; + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a @@ -423,7 +427,7 @@ public: } void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { + unsigned Depth, Instruction *CxtI) const { return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI, DT); } @@ -468,7 +472,7 @@ private: /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth, - Instruction *CxtI = nullptr); + Instruction *CxtI); bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b9eb986..6b0f268 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -164,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Ensure that the alloca array size argument has type intptr_t, so that - // any casting is exposed early. - if (DL) { - Type *IntPtrTy = DL->getIntPtrType(AI.getType()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = Builder->CreateIntCast(AI.getArraySize(), - IntPtrTy, false); - AI.setOperand(0, V); - return &AI; - } +static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { + // Check for array size of 1 (scalar allocation). + if (!AI.isArrayAllocation()) { + // i32 1 is the canonical array size for scalar allocations. + if (AI.getArraySize()->getType()->isIntegerTy(32)) + return nullptr; + + // Canonicalize it. + Value *V = IC.Builder->getInt32(1); + AI.setOperand(0, V); + return &AI; } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Type *IdxTy = DL - ? DL->getIntPtrType(AI.getType()) - : Type::getInt64Ty(AI.getContext()); - Value *NullIdx = Constant::getNullValue(IdxTy); - Value *Idx[2] = { NullIdx, NullIdx }; - Instruction *GEP = + if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) + ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = {NullIdx, NullIdx}; + Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); - InsertNewInstBefore(GEP, *It); + IC.InsertNewInstBefore(GEP, *It); - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, GEP); - } else if (isa<UndefValue>(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } + // Now make everything use the getelementptr instead of the original + // allocation. + return IC.ReplaceInstUsesWith(AI, GEP); } - if (DL && AI.getAllocatedType()->isSized()) { + if (isa<UndefValue>(AI.getArraySize())) + return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + + return nullptr; +} + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + if (auto *I = simplifyAllocaArraySize(*this, AI)) + return I; + + if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. - if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { + if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. @@ -237,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || - DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } @@ -246,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); + DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. @@ -270,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( - Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT); + Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -439,22 +452,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { return nullptr; Type *Ty = LI.getType(); + const DataLayout &DL = IC.getDataLayout(); // Try to canonicalize loads which are only ever stored to operate over // integers instead of any other type. We only do this when the loaded type // is sized and has a size exactly the same as its store size and the store // size is a legal integer type. - const DataLayout *DL = IC.getDataLayout(); - if (!Ty->isIntegerTy() && Ty->isSized() && DL && - DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) && - DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) { + if (!Ty->isIntegerTy() && Ty->isSized() && + DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && + DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) { if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) { auto *SI = dyn_cast<StoreInst>(U); return SI && SI->getPointerOperand() != &LI; })) { LoadInst *NewLoad = combineLoadToNewType( IC, LI, - Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty))); + Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast<StoreInst>(*UI++); @@ -489,7 +502,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // // FIXME: This should probably live in ValueTracking (or similar). static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, - const DataLayout *DL) { + const DataLayout &DL) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist(1, V); @@ -529,7 +542,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!CS) return false; - uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) @@ -541,7 +554,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; - uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType()); + uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType()); if (InitSize > MaxSize) return false; continue; @@ -570,8 +583,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, // offsets those indices implied. static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx) { - const DataLayout *DL = IC.getDataLayout(); - if (GEPI->getNumOperands() < 2 || !DL) + if (GEPI->getNumOperands() < 2) return false; // Find the first non-zero index of a GEP. If all indices are zero, return @@ -603,7 +615,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); if (!AllocTy || !AllocTy->isSized()) return false; - uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy); + const DataLayout &DL = IC.getDataLayout(); + uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy); // If there are more indices after the one we might replace with a zero, make // sure they're all non-negative. If any of them are negative, the overall @@ -665,18 +678,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return Res; // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT); - unsigned LoadAlign = LI.getAlignment(); - unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : - DL->getABITypeAlignment(LI.getType()); - - if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); - else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = + LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) + LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { @@ -738,8 +749,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), @@ -807,6 +818,30 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { return false; } +static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; + + Value *V = SI.getValueOperand(); + Type *T = V->getType(); + + if (!T->isAggregateType()) + return false; + + if (StructType *ST = dyn_cast<StructType>(T)) { + // If the struct only have one element, we unpack. + if (ST->getNumElements() == 1) { + V = IC.Builder->CreateExtractValue(V, 0); + combineStoreToNewValue(IC, SI, V); + return true; + } + } + + return false; +} + /// equivalentAddressValues - Test if A and B will obviously have the same /// value. This includes recognizing that %t0 and %t1 will have the same /// value in code like this: @@ -845,18 +880,20 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return EraseInstFromFunction(SI); // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : - DL->getABITypeAlignment(Val->getType()); - - if (KnownAlign > EffectiveStoreAlign) - SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) - SI.setAlignment(EffectiveStoreAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = + StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) + SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); + + // Try to canonicalize the stored type. + if (unpackStoreToAggregate(*this, SI)) + return EraseInstFromFunction(SI); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c48e3c9..35513f1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -26,7 +26,7 @@ using namespace PatternMatch; /// where it is known to be non-zero. If this allows us to simplify the /// computation, do so and return the new operand, otherwise return null. static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, - Instruction *CxtI) { + Instruction &CxtI) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. @@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) if (I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, - IC.getAssumptionCache(), CxtI, + isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, + IC.getAssumptionCache(), &CxtI, IC.getDominatorTree())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. @@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) + - ComputeNumSignBits(RHS, 0, CxtI); + unsigned SignBits = + ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // For simplicity we just check if at least one side is not negative. bool LHSNonNegative, LHSNegative; bool RHSNonNegative, RHSNegative; - ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI); + ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI); if (LHSNonNegative || RHSNonNegative) return true; } @@ -375,7 +375,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -422,7 +422,7 @@ static bool isFiniteNonZeroFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isFiniteNonZero()) return false; } @@ -437,7 +437,7 @@ static bool isNormalFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isNormal()) return false; } @@ -780,7 +780,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1155,7 +1155,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1338,7 +1338,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1385,7 +1385,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0e73db8..ca2caed 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/IR/DataLayout.h" using namespace llvm; #define DEBUG_TYPE "instcombine" @@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base, + makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; @@ -891,8 +891,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. - if (PN.getType()->isIntegerTy() && DL && - !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (PN.getType()->isIntegerTy() && + !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index dd0e65f..b28611f 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -312,9 +312,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const DataLayout *TD, const TargetLibraryInfo *TLI, - DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL, DominatorTree *DT, + AssumptionCache *AC) { // Trivial replacement. if (V == Op) return RepOp; @@ -326,18 +326,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL, TLI, DT, AC); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL, TLI, DT, AC); } @@ -361,14 +361,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (ConstOps.size() == I->getNumOperands()) { if (CmpInst *C = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], - ConstOps[1], TD, TLI); + ConstOps[1], DL, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + return ConstantFoldLoadFromConstPtr(ConstOps[0], DL); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD, TLI); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, + DL, TLI); } } @@ -635,25 +635,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -927,7 +927,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { - if (C->getZExtValue() == false) { + if (!C->getZExtValue()) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } @@ -1203,37 +1203,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return NV; if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { - // select(C, select(C, a, b), c) -> select(C, a, c) - if (TrueSI->getCondition() == CondVal) { - if (SI.getTrueValue() == TrueSI->getTrueValue()) - return nullptr; - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; - } - // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) - // We choose this as normal form to enable folding on the And and shortening - // paths for the values (this helps GetUnderlyingObjects() for example). - if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); - SI.setOperand(0, And); - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; + if (TrueSI->getCondition()->getType() == CondVal->getType()) { + // select(C, select(C, a, b), c) -> select(C, a, c) + if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return nullptr; + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) + // We choose this as normal form to enable folding on the And and shortening + // paths for the values (this helps GetUnderlyingObjects() for example). + if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { + Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + SI.setOperand(0, And); + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { - // select(C, a, select(C, b, c)) -> select(C, a, c) - if (FalseSI->getCondition() == CondVal) { - if (SI.getFalseValue() == FalseSI->getFalseValue()) - return nullptr; - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; - } - // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) - if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); - SI.setOperand(0, Or); - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; + if (FalseSI->getCondition()->getType() == CondVal->getType()) { + // select(C, a, select(C, b, c)) -> select(C, a, c) + if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return nullptr; + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) + if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { + Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + SI.setOperand(0, Or); + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b4976e0..a414ec6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, - InstCombiner &IC) { + InstCombiner &IC, const DataLayout &DL) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) @@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), - IC.getTargetLibraryInfo()); + V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo()); return V; } @@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. - I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand( + 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); return I; case Instruction::Shl: { @@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } case Instruction::Select: - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); - I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), - NumBits, isLeftShift, IC)); + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, + isLeftShift, IC, DL)); return PN; } } @@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); + return ReplaceInstUsesWith( + I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL)); } // See if we can simplify any instructions used by the instruction whose sole diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c5603aa..cd391d0 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -70,8 +69,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0, &Inst); + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + 0, &Inst); if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); @@ -84,9 +83,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth, - dyn_cast<Instruction>(U.getUser())); + Value *NewVal = + SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth, + dyn_cast<Instruction>(U.getUser())); if (!NewVal) return false; U = NewVal; return true; @@ -122,15 +121,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); - assert((DL || !VTy->isPointerTy()) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVectorTy() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); + assert( + (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; @@ -174,9 +170,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known 1 on one side, return the other. @@ -198,9 +194,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -225,9 +221,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -256,10 +252,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -294,10 +290,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Or: // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -336,10 +332,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne = RHSKnownOne | LHSKnownOne; break; case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -423,10 +419,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -445,8 +441,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(truncBf); KnownZero = KnownZero.zext(truncBf); KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); KnownZero = KnownZero.trunc(BitWidth); @@ -471,8 +467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; @@ -483,8 +479,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -510,8 +506,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, + KnownOne, Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -552,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Find information about known zero/one bits in the input. if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; // If the RHS of the add has bits set that can't affect the input, reduce @@ -602,9 +598,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } } @@ -619,9 +615,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, uint32_t NLZ = DemandedMask.countLeadingZeros(); APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } @@ -662,8 +658,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; @@ -686,8 +682,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<LShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); @@ -731,8 +727,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<AShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. @@ -772,8 +768,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. @@ -798,7 +794,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -808,10 +804,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, + KnownOne2, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, + KnownOne2, Depth + 1)) return I; unsigned Leaders = KnownZero2.countLeadingOnes(); @@ -1051,7 +1047,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; } @@ -1069,7 +1065,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt DemandedElts2 = DemandedElts; DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. @@ -1097,12 +1093,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt UndefElts4(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); + UndefElts4, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } APInt UndefElts3(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); + UndefElts3, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } bool NewUndefElts = false; @@ -1152,12 +1148,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. @@ -1204,7 +1200,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // div/rem demand all inputs, because they don't want divide by zero. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; @@ -1238,11 +1234,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Instruction::Sub: case Instruction::Mul: // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. Consider things @@ -1251,8 +1247,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; case Instruction::FPTrunc: case Instruction::FPExt: - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; @@ -1273,10 +1269,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e07efb5..b6beb65 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -202,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); DemandedMask.setBit(IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, + UndefElts)) { EI.setOperand(0, V); return &EI; } @@ -733,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { case Instruction::GetElementPtr: { Value *Ptr = NewOps[0]; ArrayRef<Value*> Idx = NewOps.slice(1); - GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); return GEP; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 88fcd53..90551e4 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -75,7 +76,7 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP); + return llvm::EmitGEPOffset(Builder, DL, GEP); } /// ShouldChangeType - Return true if it is desirable to convert a computation @@ -84,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - // If we don't have DL, we don't know if the source/dest are legal. - if (!DL) return false; - unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = DL->isLegalInteger(FromWidth); - bool ToLegal = DL->isLegalInteger(ToWidth); + bool FromLegal = DL.isLegalInteger(FromWidth); + bool ToLegal = DL.isLegalInteger(ToWidth); // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. @@ -445,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout *DL, BinaryOperator &I, + const DataLayout &DL, BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { @@ -872,12 +870,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// will land us at the specified offset. If so, fill them into NewIndices and /// return the resultant element type, otherwise return null. Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices) { + SmallVectorImpl<Value *> &NewIndices) { assert(PtrTy->isPtrOrPtrVectorTy()); - if (!DL) - return nullptr; - Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) return nullptr; @@ -885,9 +880,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL->getIntPtrType(PtrTy); + Type *IntPtrTy = DL.getIntPtrType(PtrTy); int64_t FirstIdx = 0; - if (int64_t TySize = DL->getTypeAllocSize(Ty)) { + if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; @@ -905,11 +900,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) + if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty)) return nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -920,7 +915,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType()); + uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; @@ -1214,7 +1209,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. - if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; + if (!isSafeToSpeculativelyExecute(&Inst)) + return nullptr; unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); @@ -1300,37 +1296,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. - if (DL) { - bool MadeChange = false; - Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); - I != E; ++I, ++GTI) { - // Skip indices into struct types. - SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); - if (!SeqTy) continue; - - // If the element type has zero size then any index over it is equivalent - // to an index of zero, so replace it with zero if it is not zero already. - if (SeqTy->getElementType()->isSized() && - DL->getTypeAllocSize(SeqTy->getElementType()) == 0) - if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { - *I = Constant::getNullValue(IntPtrTy); - MadeChange = true; - } + bool MadeChange = false; + Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; + ++I, ++GTI) { + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) + continue; - Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy) { - // If we are using a wider index than needed for this platform, shrink - // it to what we need. If narrower, sign-extend it to what we need. - // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, IntPtrTy, true); + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + DL.getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); MadeChange = true; } + + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; } - if (MadeChange) return &GEP; } + if (MadeChange) + return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { @@ -1338,6 +1334,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op1) return nullptr; + // Don't fold a GEP into itself through a PHI node. This can only happen + // through the back-edge of a loop. Folding a GEP into itself means that + // the value of the previous iteration needs to be stored in the meantime, + // thus requiring an additional register variable to be live, but not + // actually achieving anything (the GEP still needs to be executed once per + // loop iteration). + if (Op1 == &GEP) + return nullptr; + signed DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { @@ -1345,6 +1350,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; + // As for Op1 above, don't try to fold a GEP into itself. + if (Op2 == &GEP) + return nullptr; + // Keep track of the type as we walk the GEP. Type *CurTy = Op1->getOperand(0)->getType()->getScalarType(); @@ -1481,19 +1490,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!Indices.empty()) - return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, - GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); + return GEP.isInBounds() && Src->isInBounds() + ? GetElementPtrInst::CreateInBounds( + Src->getSourceElementType(), Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); } - if (DL && GEP.getNumIndices() == 1) { + if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == - DL->getPointerSizeInBits(AS)) { + DL.getPointerSizeInBits(AS)) { Type *PtrTy = GEP.getPointerOperandType(); Type *Ty = PtrTy->getPointerElementType(); - uint64_t TyAllocSize = DL->getTypeAllocSize(Ty); + uint64_t TyAllocSize = DL.getTypeAllocSize(Ty); bool Matched = false; uint64_t C; @@ -1562,8 +1574,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); - GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); + GetElementPtrInst *Res = GetElementPtrInst::Create( + StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) return Res; @@ -1599,9 +1611,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1612,14 +1627,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); Type *ResElTy = PtrOp->getType()->getPointerElementType(); - if (DL && SrcElTy->isArrayTy() && - DL->getTypeAllocSize(SrcElTy->getArrayElementType()) == - DL->getTypeAllocSize(ResElTy)) { - Type *IdxType = DL->getIntPtrType(GEP.getType()); + if (SrcElTy->isArrayTy() && + DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == + DL.getTypeAllocSize(ResElTy)) { + Type *IdxType = DL.getIntPtrType(GEP.getType()); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1630,11 +1647,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized()) { + if (ResElTy->isSized() && SrcElTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1642,7 +1659,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1650,9 +1667,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *NewGEP = GEP.isInBounds() && NSW ? - Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() && NSW + ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, NewIdx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1665,13 +1685,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized() && - SrcElTy->isArrayTy()) { + if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t ArrayEltSize - = DL->getTypeAllocSize(SrcElTy->getArrayElementType()); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t ArrayEltSize = + DL.getTypeAllocSize(SrcElTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1679,7 +1698,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1688,13 +1707,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Value *Off[2] = { - Constant::getNullValue(DL->getIntPtrType(GEP.getType())), - NewIdx - }; + Constant::getNullValue(DL.getIntPtrType(GEP.getType())), + NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); + Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1704,9 +1722,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (!DL) - return nullptr; - // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. @@ -1727,10 +1742,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); - unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType()); + unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); APInt Offset(OffsetBits, 0); if (!isa<BitCastInst>(Operand) && - GEP.accumulateConstantOffset(*DL, Offset)) { + GEP.accumulateConstantOffset(DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1761,7 +1776,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(Operand, NewIndices) : - Builder->CreateGEP(Operand, NewIndices); + Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -2012,6 +2027,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } + // If the condition is irrelevant, remove the use so that other + // transforms on the condition become more effective. + if (BI.isConditional() && + BI.getSuccessor(0) == BI.getSuccessor(1) && + !isa<UndefValue>(BI.getCondition())) { + BI.setCondition(UndefValue::get(BI.getCondition()->getType())); + return &BI; + } + // Canonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), @@ -2051,7 +2075,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); @@ -2070,8 +2094,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // x86 generates redundant zero-extenstion instructions if the operand is // truncated to i8 or i16. bool TruncCond = false; - if (DL && BitWidth > NewWidth && - NewWidth >= DL->getLargestLegalIntTypeSize()) { + if (NewWidth > 0 && BitWidth > NewWidth && + NewWidth >= DL.getLargestLegalIntTypeSize()) { TruncCond = true; IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder->SetInsertPoint(&SI); @@ -2632,7 +2656,7 @@ bool InstCombiner::run() { } // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && isa<Constant>(I->getOperand(0))) + if (!I->use_empty() && isa<Constant>(I->getOperand(0))) { if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) { DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); @@ -2643,6 +2667,7 @@ bool InstCombiner::run() { MadeIRChange = true; continue; } + } // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { @@ -2756,10 +2781,9 @@ bool InstCombiner::run() { /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. /// -static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSetImpl<BasicBlock*> &Visited, +static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, + SmallPtrSetImpl<BasicBlock *> &Visited, InstCombineWorklist &ICWorklist, - const DataLayout *DL, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; @@ -2797,23 +2821,22 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, continue; } - if (DL) { - // See if we can constant fold its operands. - for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); - i != e; ++i) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == nullptr) continue; + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; + ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(i); + if (CE == nullptr) + continue; - Constant*& FoldRes = FoldedConstants[CE]; - if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); - if (!FoldRes) - FoldRes = CE; + Constant *&FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); + if (!FoldRes) + FoldRes = CE; - if (FoldRes != CE) { - *i = FoldRes; - MadeIRChange = true; - } + if (FoldRes != CE) { + *i = FoldRes; + MadeIRChange = true; } } @@ -2867,7 +2890,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, /// /// This also does basic constant propagation and other forward fixing to make /// the combiner itself run much faster. -static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, +static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, InstCombineWorklist &ICWorklist) { bool MadeIRChange = false; @@ -2877,7 +2900,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, // track of which blocks we visit. SmallPtrSet<BasicBlock *, 64> Visited; MadeIRChange |= - AddReachableCodeToWorklist(F.begin(), Visited, ICWorklist, DL, TLI); + AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -2910,12 +2933,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, return MadeIRChange; } -static bool combineInstructionsOverFunction( - Function &F, InstCombineWorklist &Worklist, AssumptionCache &AC, - TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout *DL = nullptr, - LoopInfo *LI = nullptr) { +static bool +combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, + AssumptionCache &AC, TargetLibraryInfo &TLI, + DominatorTree &DT, LoopInfo *LI = nullptr) { // Minimizing size? bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize); + auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. @@ -2950,15 +2974,13 @@ static bool combineInstructionsOverFunction( PreservedAnalyses InstCombinePass::run(Function &F, AnalysisManager<Function> *AM) { - auto *DL = F.getParent()->getDataLayout(); - auto &AC = AM->getResult<AssumptionAnalysis>(F); auto &DT = AM->getResult<DominatorTreeAnalysis>(F); auto &TLI = AM->getResult<TargetLibraryAnalysis>(F); auto *LI = AM->getCachedResult<LoopAnalysis>(F); - if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI)) + if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3007,12 +3029,10 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Optional analyses. - auto *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI); + return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI); } char InstructionCombiningPass::ID = 0; |