aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/InstCombine
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp88
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp142
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp148
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp63
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h24
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp203
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp98
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp25
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp148
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp258
14 files changed, 696 insertions, 564 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 752f79d..c608f84 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// There are different heuristics we can use for this. Here are some simple
// ones.
@@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Addition of two 2's compliment numbers having opposite signs will never
// overflow.
@@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Subtraction of two 2's compliment numbers having identical signs will
// never overflow.
@@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
bool LHSKnownNonNegative, LHSKnownNegative;
bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+ &CxtI);
if (LHSKnownNegative && RHSKnownNonNegative)
return true;
@@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyVectorOp(I))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -1243,7 +1245,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1256,10 +1258,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
@@ -1307,7 +1310,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1371,7 +1374,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1384,10 +1387,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
@@ -1436,8 +1440,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
- assert(DL && "Must have target data info for this");
-
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -1662,26 +1664,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
- if (DL) {
- Value *LHSOp, *RHSOp;
- if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
- match(Op1, m_PtrToInt(m_Value(RHSOp))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
-
- // trunc(p)-trunc(q) -> trunc(p-q)
- if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
- match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
- }
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 863eeaf..ee21c81 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -979,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
ConstantRange LHSRange =
- ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue());
ConstantRange RHSRange =
- ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue());
if (LHSRange.intersectWith(RHSRange).isEmptySet())
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1709,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI,
- DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI,
- DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 05e7162..21243c2 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
@@ -61,8 +60,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -108,7 +107,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -156,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -198,6 +197,71 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
@@ -386,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// can prove that it will never overflow.
if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
+ if (WillNotOverflowSignedAdd(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
}
}
@@ -407,11 +471,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
+ if (WillNotOverflowSignedSub(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
}
} else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
+ if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
}
}
@@ -452,7 +516,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
+ if (WillNotOverflowSignedMul(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
}
}
@@ -544,7 +608,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -561,7 +625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -578,7 +642,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::ppc_qpx_qvlfs:
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -587,7 +651,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvlfd:
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
32) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -596,7 +660,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvstfs:
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -606,7 +670,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvstfd:
// Turn PPC QPX qvstfd -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
32) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -618,7 +682,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -735,9 +799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
+ if (Value *TmpV = SimplifyDemandedVectorElts(
+ II->getArgOperand(0), InputDemandedElts, UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
@@ -906,6 +969,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -945,12 +1016,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
@@ -979,7 +1050,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1118,7 +1189,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
@@ -1192,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -1217,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -1226,7 +1297,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
@@ -1391,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -1408,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -1487,7 +1558,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
//
// into:
// call void @takes_i32_inalloca(i32* null)
- if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1512,12 +1586,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3e2b719..fe544c2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// try to eliminate the cast by moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return nullptr;
-
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
@@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
- unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
@@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
@@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
static Instruction::CastOps
-isEliminableCastPair(
- const CastInst *CI, ///< The first cast instruction
- unsigned opcode, ///< The opcode of the second cast instruction
- Type *DstTy, ///< The target type for the second cast instruction
- const DataLayout *DL ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : nullptr;
- Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : nullptr;
- Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : nullptr;
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!Src->getType()->isIntegerTy() ||
- !CI.getType()->isIntegerTy() ||
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -1419,18 +1413,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
-
- if (DL) {
- unsigned AS = CI.getAddressSpace();
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- DL->getPointerSizeInBits(AS)) {
- Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
- }
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1460,32 +1451,33 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
- if (!DL)
- return commonCastTransforms(CI);
-
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
unsigned AS = GEP->getPointerAddressSpace();
- unsigned OffsetBits = DL->getPointerSizeInBits(AS);
+ unsigned OffsetBits = DL.getPointerSizeInBits(AS);
APInt Offset(OffsetBits, 0);
BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
- if (GEP->hasOneUse() &&
- BCI &&
- GEP->accumulateConstantOffset(*DL, Offset)) {
+ if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) {
+ // FIXME: This is insufficiently tested - just a no-crash test
+ // (test/Transforms/InstCombine/2007-05-14-Crash.ll)
+ //
// Get the base pointer input of the bitcast, and the type it points to.
Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(OrigBase->getType(),
- Offset.getSExtValue(),
+ if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(),
NewIndices)) {
+ // FIXME: This codepath is completely untested - could be unreachable
+ // for all I know.
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
- Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Value *NGEP = cast<GEPOperator>(GEP)->isInBounds()
+ ? Builder->CreateInBoundsGEP(OrigBase, NewIndices)
+ : Builder->CreateGEP(
+ OrigBase->getType()->getPointerElementType(),
+ OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1504,16 +1496,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (!DL)
- return commonPointerCastTransforms(CI);
-
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
- Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
@@ -1597,8 +1586,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
static bool CollectInsertionElements(Value *V, unsigned Shift,
- SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy, InstCombiner &IC) {
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
"Shift should be a multiple of the element type size");
@@ -1614,7 +1603,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
return true;
unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
- if (IC.getDataLayout()->isBigEndian())
+ if (isBigEndian)
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
@@ -1634,7 +1623,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- Shift, Elements, VecEltTy, IC);
+ Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1649,7 +1638,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
return false;
}
return true;
@@ -1662,28 +1652,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC) &&
- CollectInsertionElements(I->getOperand(1), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
}
}
@@ -1706,15 +1696,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType(), IC))
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1734,10 +1722,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+ const DataLayout &DL) {
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1760,7 +1746,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = 0;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1784,7 +1770,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1839,7 +1825,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
return I;
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f48d89b..803b50a 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
- // We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && !DL)
- return nullptr;
-
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
@@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// - Default to i32
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
- else if (DL)
- Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
- else if (ArrayElementCount <= 32)
- Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
@@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
///
/// If we can't emit an optimized form for this expression, this returns null.
///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- const DataLayout &DL = *IC.getDataLayout();
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+ const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (PtrBase == RHS && GEPLHS->isInBounds()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
- Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
// If not, synthesize the offset the hard way.
if (!Offset)
@@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
- if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
- GEPRHS->getOperand(0)->stripPointerCasts()) {
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
@@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (DL &&
- GEPsInBounds &&
- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
@@ -1928,8 +1919,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
- if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
- DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = nullptr;
if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
Value *RHSCIOp = RHSC->getOperand(0);
@@ -2660,8 +2651,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
- else if (DL) // Pointers require DL info to get their size.
- BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
+ else // Get pointer size.
+ BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
bool isSignBit = false;
@@ -2774,8 +2765,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
- APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+ Op1KnownOne, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -3094,9 +3085,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
- if (RHSC->isNullValue() && DL &&
- DL->getIntPtrType(RHSC->getType()) ==
- LHSI->getOperand(0)->getType())
+ if (RHSC->isNullValue() &&
+ DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
break;
@@ -3428,7 +3418,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality())
+ isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
@@ -3563,6 +3553,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
+ // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+ if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+ Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ I.getName() + ".mask");
+ return new ICmpInst(I.getPredicate(), And,
+ Constant::getNullValue(Cst1->getType()));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2fd5318..fb2321d 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -158,10 +158,10 @@ private:
AssumptionCache *AC;
TargetLibraryInfo *TLI;
DominatorTree *DT;
+ const DataLayout &DL;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
- const DataLayout *DL;
LoopInfo *LI;
bool MadeIRChange;
@@ -169,7 +169,7 @@ private:
public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
- DominatorTree *DT, const DataLayout *DL, LoopInfo *LI)
+ DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
@@ -180,7 +180,7 @@ public:
AssumptionCache *getAssumptionCache() const { return AC; }
- const DataLayout *getDataLayout() const { return DL; }
+ const DataLayout &getDataLayout() const { return DL; }
DominatorTree *getDominatorTree() const { return DT; }
@@ -330,17 +330,17 @@ private:
Type *Ty);
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
+ Instruction *tryOptimizeCall(CallInst *CI);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
@@ -372,6 +372,10 @@ public:
/// I to the worklist, replace all uses of I with the new value, then return
/// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty()) return nullptr;
+
Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
@@ -423,7 +427,7 @@ public:
}
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ unsigned Depth, Instruction *CxtI) const {
return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
DT);
}
@@ -468,7 +472,7 @@ private:
/// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
- Instruction *CxtI = nullptr);
+ Instruction *CxtI);
bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b9eb986..6b0f268 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -164,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
- // Ensure that the alloca array size argument has type intptr_t, so that
- // any casting is exposed early.
- if (DL) {
- Type *IntPtrTy = DL->getIntPtrType(AI.getType());
- if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = Builder->CreateIntCast(AI.getArraySize(),
- IntPtrTy, false);
- AI.setOperand(0, V);
- return &AI;
- }
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+ // Check for array size of 1 (scalar allocation).
+ if (!AI.isArrayAllocation()) {
+ // i32 1 is the canonical array size for scalar allocations.
+ if (AI.getArraySize()->getType()->isIntegerTy(32))
+ return nullptr;
+
+ // Canonicalize it.
+ Value *V = IC.Builder->getInt32(1);
+ AI.setOperand(0, V);
+ return &AI;
}
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
- if (AI.isArrayAllocation()) { // Check C != 1
- if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
- ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It = New;
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = DL
- ? DL->getIntPtrType(AI.getType())
- : Type::getInt64Ty(AI.getContext());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = { NullIdx, NullIdx };
- Instruction *GEP =
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
- InsertNewInstBefore(GEP, *It);
+ IC.InsertNewInstBefore(GEP, *It);
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return ReplaceInstUsesWith(AI, GEP);
- } else if (isa<UndefValue>(AI.getArraySize())) {
- return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
- }
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.ReplaceInstUsesWith(AI, GEP);
}
- if (DL && AI.getAllocatedType()->isSized()) {
+ if (isa<UndefValue>(AI.getArraySize()))
+ return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ if (auto *I = simplifyAllocaArraySize(*this, AI))
+ return I;
+
+ if (AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
// allocate and return a unique pointer, even for a zero byte allocation.
- if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
// For a zero sized alloca there is no point in doing an array allocation.
// This is helpful if the array size is a complicated expression not used
// elsewhere.
@@ -237,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// dominance as the array size was forced to a constant earlier already.
AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
AI.moveBefore(FirstInst);
return &AI;
}
@@ -246,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
@@ -270,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
- Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
+ Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -439,22 +452,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
return nullptr;
Type *Ty = LI.getType();
+ const DataLayout &DL = IC.getDataLayout();
// Try to canonicalize loads which are only ever stored to operate over
// integers instead of any other type. We only do this when the loaded type
// is sized and has a size exactly the same as its store size and the store
// size is a legal integer type.
- const DataLayout *DL = IC.getDataLayout();
- if (!Ty->isIntegerTy() && Ty->isSized() && DL &&
- DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) &&
- DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) {
+ if (!Ty->isIntegerTy() && Ty->isSized() &&
+ DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+ DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
auto *SI = dyn_cast<StoreInst>(U);
return SI && SI->getPointerOperand() != &LI;
})) {
LoadInst *NewLoad = combineLoadToNewType(
IC, LI,
- Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty)));
+ Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
// Replace all the stores with stores of the newly loaded value.
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
auto *SI = cast<StoreInst>(*UI++);
@@ -489,7 +502,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
//
// FIXME: This should probably live in ValueTracking (or similar).
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
- const DataLayout *DL) {
+ const DataLayout &DL) {
SmallPtrSet<Value *, 4> Visited;
SmallVector<Value *, 4> Worklist(1, V);
@@ -529,7 +542,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
if (!CS)
return false;
- uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Make sure that, even if the multiplication below would wrap as an
// uint64_t, we still do the right thing.
if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
@@ -541,7 +554,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;
- uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
if (InitSize > MaxSize)
return false;
continue;
@@ -570,8 +583,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
// offsets those indices implied.
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
Instruction *MemI, unsigned &Idx) {
- const DataLayout *DL = IC.getDataLayout();
- if (GEPI->getNumOperands() < 2 || !DL)
+ if (GEPI->getNumOperands() < 2)
return false;
// Find the first non-zero index of a GEP. If all indices are zero, return
@@ -603,7 +615,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
if (!AllocTy || !AllocTy->isSized())
return false;
- uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
+ const DataLayout &DL = IC.getDataLayout();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
// If there are more indices after the one we might replace with a zero, make
// sure they're all non-negative. If any of them are negative, the overall
@@ -665,18 +678,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return Res;
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
- unsigned LoadAlign = LI.getAlignment();
- unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
- DL->getABITypeAlignment(LI.getType());
-
- if (KnownAlign > EffectiveLoadAlign)
- LI.setAlignment(KnownAlign);
- else if (LoadAlign == 0)
- LI.setAlignment(EffectiveLoadAlign);
- }
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign =
+ LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
@@ -738,8 +749,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
SI->getOperand(1)->getName()+".val");
LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -807,6 +818,30 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
return false;
}
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
+
+ Value *V = SI.getValueOperand();
+ Type *T = V->getType();
+
+ if (!T->isAggregateType())
+ return false;
+
+ if (StructType *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// equivalentAddressValues - Test if A and B will obviously have the same
/// value. This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
@@ -845,18 +880,20 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return EraseInstFromFunction(SI);
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
- unsigned StoreAlign = SI.getAlignment();
- unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
- DL->getABITypeAlignment(Val->getType());
-
- if (KnownAlign > EffectiveStoreAlign)
- SI.setAlignment(KnownAlign);
- else if (StoreAlign == 0)
- SI.setAlignment(EffectiveStoreAlign);
- }
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign =
+ StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+
+ // Try to canonicalize the stored type.
+ if (unpackStoreToAggregate(*this, SI))
+ return EraseInstFromFunction(SI);
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c48e3c9..35513f1 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -26,7 +26,7 @@ using namespace PatternMatch;
/// where it is known to be non-zero. If this allows us to simplify the
/// computation, do so and return the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
@@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
if (I->isLogicalShift() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0,
- IC.getAssumptionCache(), CxtI,
+ isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+ IC.getAssumptionCache(), &CxtI,
IC.getDominatorTree())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
@@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// Note that underestimating the number of sign bits gives a more
// conservative answer.
- unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
- ComputeNumSignBits(RHS, 0, CxtI);
+ unsigned SignBits =
+ ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
@@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// For simplicity we just check if at least one side is not negative.
bool LHSNonNegative, LHSNegative;
bool RHSNonNegative, RHSNegative;
- ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+ ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
if (LHSNonNegative || RHSNonNegative)
return true;
}
@@ -375,7 +375,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -422,7 +422,7 @@ static bool isFiniteNonZeroFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
return false;
}
@@ -437,7 +437,7 @@ static bool isNormalFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isNormal())
return false;
}
@@ -780,7 +780,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1155,7 +1155,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1338,7 +1338,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1385,7 +1385,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 0e73db8..ca2caed 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+ GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+ makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -891,8 +891,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (PN.getType()->isIntegerTy() && DL &&
- !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (PN.getType()->isIntegerTy() &&
+ !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index dd0e65f..b28611f 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -312,9 +312,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const DataLayout *TD,
const TargetLibraryInfo *TLI,
- DominatorTree *DT, AssumptionCache *AC) {
+ const DataLayout &DL, DominatorTree *DT,
+ AssumptionCache *AC) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -326,18 +326,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL,
TLI, DT, AC);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL,
TLI, DT, AC);
}
@@ -361,14 +361,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (ConstOps.size() == I->getNumOperands()) {
if (CmpInst *C = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], TD, TLI);
+ ConstOps[1], DL, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], DL);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD, TLI);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+ DL, TLI);
}
}
@@ -635,25 +635,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -927,7 +927,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
- if (C->getZExtValue() == false) {
+ if (!C->getZExtValue()) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
@@ -1203,37 +1203,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return NV;
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
- // select(C, select(C, a, b), c) -> select(C, a, c)
- if (TrueSI->getCondition() == CondVal) {
- if (SI.getTrueValue() == TrueSI->getTrueValue())
- return nullptr;
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
- }
- // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
- // We choose this as normal form to enable folding on the And and shortening
- // paths for the values (this helps GetUnderlyingObjects() for example).
- if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
- Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
- SI.setOperand(0, And);
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
+ if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, select(C, a, b), c) -> select(C, a, c)
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return nullptr;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+ // We choose this as normal form to enable folding on the And and shortening
+ // paths for the values (this helps GetUnderlyingObjects() for example).
+ if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+ Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ SI.setOperand(0, And);
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
- // select(C, a, select(C, b, c)) -> select(C, a, c)
- if (FalseSI->getCondition() == CondVal) {
- if (SI.getFalseValue() == FalseSI->getFalseValue())
- return nullptr;
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
- }
- // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
- if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
- Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
- SI.setOperand(0, Or);
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
+ if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, a, select(C, b, c)) -> select(C, a, c)
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return nullptr;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+ if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+ Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ SI.setOperand(0, Or);
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b4976e0..a414ec6 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC) {
+ InstCombiner &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
@@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
- IC.getTargetLibraryInfo());
+ V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
return V;
}
@@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::Shl: {
@@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
}
case Instruction::Select:
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
- I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
- NumBits, isLeftShift, IC));
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ isLeftShift, IC, DL));
return PN;
}
}
@@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
- return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
+ return ReplaceInstUsesWith(
+ I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c5603aa..cd391d0 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -70,8 +69,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
- KnownZero, KnownOne, 0, &Inst);
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ 0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
@@ -84,9 +83,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
- KnownZero, KnownOne, Depth,
- dyn_cast<Instruction>(U.getUser()));
+ Value *NewVal =
+ SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth,
+ dyn_cast<Instruction>(U.getUser()));
if (!NewVal) return false;
U = NewVal;
return true;
@@ -122,15 +121,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
- assert((DL || !VTy->isPointerTy()) &&
- "SimplifyDemandedBits needs to know bit widths!");
- assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
- (!VTy->isIntOrIntVectorTy() ||
- VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ assert(
+ (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -174,9 +170,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known 1 on one side, return the other.
@@ -198,9 +194,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -225,9 +221,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -256,10 +252,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -294,10 +290,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Or:
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -336,10 +332,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne = RHSKnownOne | LHSKnownOne;
break;
case Instruction::Xor: {
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -423,10 +419,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Select:
- if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -445,8 +441,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.zext(truncBf);
KnownZero = KnownZero.zext(truncBf);
KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
KnownZero = KnownZero.trunc(BitWidth);
@@ -471,8 +467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
@@ -483,8 +479,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -510,8 +506,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+ KnownOne, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -552,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Find information about known zero/one bits in the input.
if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
// If the RHS of the add has bits set that can't affect the input, reduce
@@ -602,9 +598,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// significant bit and all those below it.
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
}
}
@@ -619,9 +615,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
uint32_t NLZ = DemandedMask.countLeadingZeros();
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
}
@@ -662,8 +658,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
@@ -686,8 +682,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
@@ -731,8 +727,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
@@ -772,8 +768,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
@@ -798,7 +794,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
@@ -808,10 +804,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
- KnownZero2, KnownOne2, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
- KnownZero2, KnownOne2, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1))
return I;
unsigned Leaders = KnownZero2.countLeadingOnes();
@@ -1051,7 +1047,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
}
@@ -1069,7 +1065,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
@@ -1097,12 +1093,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts4, Depth+1);
+ UndefElts4, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- UndefElts3, Depth+1);
+ UndefElts3, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
bool NewUndefElts = false;
@@ -1152,12 +1148,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined.
@@ -1204,7 +1200,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// div/rem demand all inputs, because they don't want divide by zero.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) {
I->setOperand(0, TmpV);
MadeChange = true;
@@ -1238,11 +1234,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Instruction::Sub:
case Instruction::Mul:
// div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined. Consider things
@@ -1251,8 +1247,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
@@ -1273,10 +1269,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e07efb5..b6beb65 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -202,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- DemandedMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
@@ -733,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 88fcd53..90551e4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -57,6 +57,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -75,7 +76,7 @@ STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+ return llvm::EmitGEPOffset(Builder, DL, GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -84,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
- // If we don't have DL, we don't know if the source/dest are legal.
- if (!DL) return false;
-
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- bool FromLegal = DL->isLegalInteger(FromWidth);
- bool ToLegal = DL->isLegalInteger(ToWidth);
+ bool FromLegal = DL.isLegalInteger(FromWidth);
+ bool ToLegal = DL.isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -445,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
- const DataLayout *DL, BinaryOperator &I,
+ const DataLayout &DL, BinaryOperator &I,
Instruction::BinaryOps InnerOpcode, Value *A,
Value *B, Value *C, Value *D) {
@@ -872,12 +870,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// will land us at the specified offset. If so, fill them into NewIndices and
/// return the resultant element type, otherwise return null.
Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
+ SmallVectorImpl<Value *> &NewIndices) {
assert(PtrTy->isPtrOrPtrVectorTy());
- if (!DL)
- return nullptr;
-
Type *Ty = PtrTy->getPointerElementType();
if (!Ty->isSized())
return nullptr;
@@ -885,9 +880,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = DL->getIntPtrType(PtrTy);
+ Type *IntPtrTy = DL.getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
- if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
+ if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
@@ -905,11 +900,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
+ if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -920,7 +915,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
@@ -1214,7 +1209,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
// It may not be safe to reorder shuffles and things like div, urem, etc.
// because we may trap when executing those ops on unknown vector elements.
// See PR20059.
- if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+ if (!isSafeToSpeculativelyExecute(&Inst))
+ return nullptr;
unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
@@ -1300,37 +1296,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
- if (DL) {
- bool MadeChange = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
- I != E; ++I, ++GTI) {
- // Skip indices into struct types.
- SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
- if (!SeqTy) continue;
-
- // If the element type has zero size then any index over it is equivalent
- // to an index of zero, so replace it with zero if it is not zero already.
- if (SeqTy->getElementType()->isSized() &&
- DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
- if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
- MadeChange = true;
- }
+ bool MadeChange = false;
+ Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+ ++I, ++GTI) {
+ // Skip indices into struct types.
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy)
+ continue;
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
- // If we are using a wider index than needed for this platform, shrink
- // it to what we need. If narrower, sign-extend it to what we need.
- // This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
MadeChange = true;
}
+
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
}
- if (MadeChange) return &GEP;
}
+ if (MadeChange)
+ return &GEP;
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
@@ -1338,6 +1334,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op1)
return nullptr;
+ // Don't fold a GEP into itself through a PHI node. This can only happen
+ // through the back-edge of a loop. Folding a GEP into itself means that
+ // the value of the previous iteration needs to be stored in the meantime,
+ // thus requiring an additional register variable to be live, but not
+ // actually achieving anything (the GEP still needs to be executed once per
+ // loop iteration).
+ if (Op1 == &GEP)
+ return nullptr;
+
signed DI = -1;
for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
@@ -1345,6 +1350,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
return nullptr;
+ // As for Op1 above, don't try to fold a GEP into itself.
+ if (Op2 == &GEP)
+ return nullptr;
+
// Keep track of the type as we walk the GEP.
Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
@@ -1481,19 +1490,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!Indices.empty())
- return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
- GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+ return GEP.isInBounds() && Src->isInBounds()
+ ? GetElementPtrInst::CreateInBounds(
+ Src->getSourceElementType(), Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
}
- if (DL && GEP.getNumIndices() == 1) {
+ if (GEP.getNumIndices() == 1) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
- DL->getPointerSizeInBits(AS)) {
+ DL.getPointerSizeInBits(AS)) {
Type *PtrTy = GEP.getPointerOperandType();
Type *Ty = PtrTy->getPointerElementType();
- uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
bool Matched = false;
uint64_t C;
@@ -1562,8 +1574,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
- GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+ GetElementPtrInst *Res = GetElementPtrInst::Create(
+ StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1599,9 +1611,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(StrippedPtr, Idx,
+ GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, Idx, GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
@@ -1612,14 +1627,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
Type *ResElTy = PtrOp->getType()->getPointerElementType();
- if (DL && SrcElTy->isArrayTy() &&
- DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
- DL->getTypeAllocSize(ResElTy)) {
- Type *IdxType = DL->getIntPtrType(GEP.getType());
+ if (SrcElTy->isArrayTy() &&
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL.getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL.getIntPtrType(GEP.getType());
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1630,11 +1647,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
+ if (ResElTy->isSized() && SrcElTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1642,7 +1659,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1650,9 +1667,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, NewIdx, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1665,13 +1685,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
- SrcElTy->isArrayTy()) {
+ if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize
- = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1679,7 +1698,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1688,13 +1707,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *Off[2] = {
- Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
- NewIdx
- };
+ Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+ NewIdx};
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEP.getType());
@@ -1704,9 +1722,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- if (!DL)
- return nullptr;
-
// addrspacecast between types is canonicalized as a bitcast, then an
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
// through the addrspacecast.
@@ -1727,10 +1742,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
+ unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*DL, Offset)) {
+ GEP.accumulateConstantOffset(DL, Offset)) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1761,7 +1776,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(Operand, NewIndices) :
- Builder->CreateGEP(Operand, NewIndices);
+ Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -2012,6 +2027,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
+ // If the condition is irrelevant, remove the use so that other
+ // transforms on the condition become more effective.
+ if (BI.isConditional() &&
+ BI.getSuccessor(0) == BI.getSuccessor(1) &&
+ !isa<UndefValue>(BI.getCondition())) {
+ BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+ return &BI;
+ }
+
// Canonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
@@ -2051,7 +2075,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
@@ -2070,8 +2094,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
// x86 generates redundant zero-extenstion instructions if the operand is
// truncated to i8 or i16.
bool TruncCond = false;
- if (DL && BitWidth > NewWidth &&
- NewWidth >= DL->getLargestLegalIntTypeSize()) {
+ if (NewWidth > 0 && BitWidth > NewWidth &&
+ NewWidth >= DL.getLargestLegalIntTypeSize()) {
TruncCond = true;
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder->SetInsertPoint(&SI);
@@ -2632,7 +2656,7 @@ bool InstCombiner::run() {
}
// Instruction isn't dead, see if we can constant propagate it.
- if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
@@ -2643,6 +2667,7 @@ bool InstCombiner::run() {
MadeIRChange = true;
continue;
}
+ }
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
@@ -2756,10 +2781,9 @@ bool InstCombiner::run() {
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
///
-static bool AddReachableCodeToWorklist(BasicBlock *BB,
- SmallPtrSetImpl<BasicBlock*> &Visited,
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
InstCombineWorklist &ICWorklist,
- const DataLayout *DL,
const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
@@ -2797,23 +2821,22 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
continue;
}
- if (DL) {
- // See if we can constant fold its operands.
- for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
- i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == nullptr) continue;
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+ ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == nullptr)
+ continue;
- Constant*& FoldRes = FoldedConstants[CE];
- if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
- if (!FoldRes)
- FoldRes = CE;
+ Constant *&FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
- if (FoldRes != CE) {
- *i = FoldRes;
- MadeIRChange = true;
- }
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
}
}
@@ -2867,7 +2890,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
///
/// This also does basic constant propagation and other forward fixing to make
/// the combiner itself run much faster.
-static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
TargetLibraryInfo *TLI,
InstCombineWorklist &ICWorklist) {
bool MadeIRChange = false;
@@ -2877,7 +2900,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
// track of which blocks we visit.
SmallPtrSet<BasicBlock *, 64> Visited;
MadeIRChange |=
- AddReachableCodeToWorklist(F.begin(), Visited, ICWorklist, DL, TLI);
+ AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -2910,12 +2933,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
return MadeIRChange;
}
-static bool combineInstructionsOverFunction(
- Function &F, InstCombineWorklist &Worklist, AssumptionCache &AC,
- TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout *DL = nullptr,
- LoopInfo *LI = nullptr) {
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+ AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT, LoopInfo *LI = nullptr) {
// Minimizing size?
bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
+ auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
@@ -2950,15 +2974,13 @@ static bool combineInstructionsOverFunction(
PreservedAnalyses InstCombinePass::run(Function &F,
AnalysisManager<Function> *AM) {
- auto *DL = F.getParent()->getDataLayout();
-
auto &AC = AM->getResult<AssumptionAnalysis>(F);
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
auto *LI = AM->getCachedResult<LoopAnalysis>(F);
- if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI))
+ if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3007,12 +3029,10 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Optional analyses.
- auto *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- auto *DL = DLP ? &DLP->getDataLayout() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI);
+ return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
}
char InstructionCombiningPass::ID = 0;