aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/InstCombine
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h13
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp184
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp34
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp156
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp119
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp51
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp112
13 files changed, 545 insertions, 241 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index b3084cc..a5eddc2 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -158,8 +158,8 @@ public:
ConstantInt *DivRHS);
Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
- Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
@@ -178,6 +178,7 @@ public:
Instruction *visitPtrToInt(PtrToIntInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
@@ -212,8 +213,8 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
@@ -271,7 +272,7 @@ public:
if (&I == V)
V = UndefValue::get(I.getType());
- DEBUG(errs() << "IC: Replacing " << I << "\n"
+ DEBUG(dbgs() << "IC: Replacing " << I << "\n"
" with " << *V << '\n');
I.replaceAllUsesWith(V);
@@ -283,7 +284,7 @@ public:
// instruction. Instead, visit methods should return the value returned by
// this function.
Instruction *EraseInstFromFunction(Instruction &I) {
- DEBUG(errs() << "IC: ERASE " << I << '\n');
+ DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
// Make sure that we reprocess all operands now that we reduced their
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b474bd8..88bb69b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -488,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
return result;
}
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+ unsigned NewMask;
+ NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed))
+ << 1;
+
+ NewMask |=
+ (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed))
+ >> 1;
+
+ return NewMask;
+}
+
/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
/// if possible. The returned predicate is either == or !=. Returns false if
/// decomposition fails.
@@ -548,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
L21 = L22 = L1 = 0;
} else {
// Look for ANDs in the LHS icmp.
- if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
- if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
- L21 = L22 = 0;
- } else {
- if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
- return 0;
- std::swap(L1, L2);
+ if (!L1->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L11 = L12 = 0;
+ } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ // Any icmp can be viewed as being trivially masked; if it allows us to
+ // remove one, it's worth it.
+ L11 = L1;
+ L12 = Constant::getAllOnesValue(L1->getType());
+ }
+
+ if (!L2->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
L21 = L22 = 0;
+ } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+ L21 = L2;
+ L22 = Constant::getAllOnesValue(L2->getType());
}
}
@@ -576,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
}
E = R2; R1 = 0; ok = true;
- } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ } else if (R1->getType()->isIntegerTy()) {
+ if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ // As before, model no mask as a trivial mask if it'll let us do an
+ // optimisation.
+ R11 = R1;
+ R12 = Constant::getAllOnesValue(R1->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R2; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -589,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
// Look for ANDs in on the right side of the RHS icmp.
- if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (!ok && R2->getType()->isIntegerTy()) {
+ if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ R11 = R2;
+ R12 = Constant::getAllOnesValue(R2->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R1; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -618,8 +658,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// foldLogOpOfMaskedICmps:
/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
- ICmpInst::Predicate NEWCC,
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -629,8 +668,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
- if (NEWCC == ICmpInst::ICMP_NE)
- mask >>= 1; // treat "Not"-states as normal states
+ // In full generality:
+ // (icmp (A & B) Op C) | (icmp (A & D) Op E)
+ // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+ //
+ // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+ // equivalent to (icmp (A & X) !Op Y).
+ //
+ // Therefore, we can pretend for the rest of this function that we're dealing
+ // with the conjunction, provided we flip the sense of any comparisons (both
+ // input and output).
+
+ // In most cases we're going to produce an EQ for the "&&" case.
+ ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ if (!IsAnd) {
+ // Convert the masking analysis into its equivalent with negated
+ // comparisons.
+ mask = conjugateICmpMask(mask);
+ }
if (mask & FoldMskICmp_Mask_AllZeroes) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
@@ -657,6 +712,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
Value* newAnd = Builder->CreateAnd(A, newAnd1);
return Builder->CreateICmp(NEWCC, newAnd, A);
}
+
+ // Remaining cases assume at least that B and D are constant, and depend on
+ // their actual values. This isn't strictly, necessary, just a "handle the
+ // easy cases for now" decision.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+
+ if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() & DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (mask & FoldMskICmp_AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() | DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
if (mask & FoldMskICmp_BMask_Mixed) {
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
@@ -665,14 +754,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
// contradict, then we can transform to
// -> (icmp eq (A & (B|D)), (C|E))
// Currently, we only handle the case of B, C, D, and E being constant.
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
// we can't simply use C and E, because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
-
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (CCst == 0) return 0;
if (LHSCC != NEWCC)
@@ -715,7 +799,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -849,10 +933,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return RHS;
case ICmpInst::ICMP_NE:
+ // Special case to get the ordering right when the values wrap around
+ // zero.
+ if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
+ std::swap(LHSCst, RHSCst);
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+ Val->getName()+".cmp");
}
break; // (X != 13 & X != 15) -> no change
}
@@ -1454,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return 0;
}
+/// IsOneHotValue - Returns true for "one-hot" values (values where at most
+/// one bit can be set).
+static bool IsOneHotValue(Value *V) {
+ // Match 1<<K.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Shl) {
+ ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
+ return One && One->isOne();
+ }
+
+ // Check for power of two integer constants.
+ if (ConstantInt *K = dyn_cast<ConstantInt>(V))
+ return K->getValue().isPowerOf2();
+
+ return false;
+}
+
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+ BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+ BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+ if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+ LAnd->getOpcode() == Instruction::And &&
+ RAnd->getOpcode() == Instruction::And) {
+
+ Value *Mask = 0;
+ Value *Masked = 0;
+ if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+ IsOneHotValue(LAnd->getOperand(1)) &&
+ IsOneHotValue(RAnd->getOperand(1))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+ Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+ } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+ IsOneHotValue(LAnd->getOperand(0)) &&
+ IsOneHotValue(RAnd->getOperand(0))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+ Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+ }
+
+ if (Masked)
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+ }
+ }
+
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1474,13 +1613,10 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
-
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9f74fd6..0cd7b14 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -999,20 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
- if (Callee->isDeclaration() &&
- // Conversion is ok if changing from one pointer type to another or from
- // a pointer to an integer of the same size.
- !((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
- return false; // Cannot transform this return value.
+ if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+ if (Callee->isDeclaration())
+ return false; // Cannot transform this return value.
- if (!Caller->use_empty() &&
- // void -> non-void is handled specially
- !NewRetTy->isVoidTy() &&
- !CastInst::isBitCastable(NewRetTy, OldRetTy))
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy())
return false; // Cannot transform this return value.
+ }
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
@@ -1045,9 +1040,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
- if (!CastInst::isBitCastable(ActTy, ParamTy)) {
+ if (!CastInst::isBitCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- }
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
hasAttributes(AttributeFuncs::
@@ -1063,21 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
- Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ Type *CurElTy = ActTy->getPointerElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
-
- // Converting from one pointer type to another or between a pointer and an
- // integer of the same size is safe even if we do not have a body.
- bool isConvertible = ActTy == ParamTy ||
- (TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
- if (Callee->isDeclaration() && !isConvertible)
- return false;
}
if (Callee->isDeclaration()) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 361acdd..72377dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1229,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2)))) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
@@ -1249,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // Note that we restrict this transformation based on
+ // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+ // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+ // single-precision intrinsic can be expanded in the backend.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+ (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+ Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1262,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
- Callee->getAttributes(),
- Builder->getFloatTy(),
- Builder->getFloatTy(),
- NULL);
+ Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+ M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+ Builder->getFloatTy(), Builder->getFloatTy(),
+ NULL);
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
@@ -1338,14 +1356,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
+
+ if (TD) {
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits(AS)) {
+ Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1370,25 +1392,32 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
+ if (!TD)
+ return commonCastTransforms(CI);
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned OffsetBits = TD->getPointerSizeInBits(AS);
+ APInt Offset(OffsetBits, 0);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
+ if (GEP->hasOneUse() &&
+ BCI &&
GEP->accumulateConstantOffset(*TD, Offset)) {
// Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- Type *GEPIdxTy =
- cast<PointerType>(OrigBase->getType())->getElementType();
+ Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OrigBase->getType(),
+ Offset.getSExtValue(),
+ NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1406,16 +1435,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
- return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
- }
+ if (!TD)
+ return commonPointerCastTransforms(CI);
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
- return commonPointerCastTransforms(CI);
+ Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
/// OptimizeVectorResize - This input value (which is known to have vector type)
@@ -1488,12 +1523,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy) {
+ Type *VecEltTy, InstCombiner &IC) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
@@ -1505,8 +1545,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (C->isNullValue())
return true;
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (IC.getDataLayout()->isBigEndian())
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
// Fail if multiple elements are inserted into this slot.
- if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex] != 0)
return false;
Elements[ElementIndex] = V;
@@ -1522,7 +1566,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- ElementIndex, Elements, VecEltTy);
+ Shift, Elements, VecEltTy, IC);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1533,10 +1577,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- i*ElementSize));
+ ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
return false;
}
return true;
@@ -1549,29 +1594,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy) &&
- CollectInsertionElements(I->getOperand(1), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC) &&
+ CollectInsertionElements(I->getOperand(1), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (CI == 0) return false;
- if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
- unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
- return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
- Elements, VecEltTy);
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
}
}
@@ -1594,12 +1638,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType()))
+ DestVecTy->getElementType(), IC))
return 0;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1785,10 +1832,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- SVI->getType()->getNumElements() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
@@ -1810,3 +1856,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ return commonCastTransforms(CI);
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c0225ae..9bb65ef 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -227,7 +227,8 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && TD == 0) return 0;
+ if (!GEP->isInBounds() && TD == 0)
+ return 0;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
@@ -393,9 +394,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
- if (!GEP->isInBounds() &&
- Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ if (!GEP->isInBounds()) {
+ Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+ unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ }
// If the comparison is only true for one or two elements, emit direct
// comparisons.
@@ -562,16 +566,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
}
+
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+ unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -593,7 +599,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -737,10 +742,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred,
- Value *TheAdd) {
+ ICmpInst::Predicate Pred) {
// If we have X+0, exit early (simplifying logic below) and let it get folded
// elsewhere. icmp X+0, X -> icmp X, X
if (CI->isZero()) {
@@ -1194,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Type *AndTy = AndCST->getType(); // Type of the and.
// We can fold this as long as we can't shift unknown bits
- // into the mask. This can only happen with signed shift
- // rights, as they sign-extend.
+ // into the mask. This can happen with signed shift
+ // rights, as they sign-extend. With logical shifts,
+ // we must still make sure the comparison is not signed
+ // because we are effectively changing the
+ // position of the sign bit (PR17827).
+ // TODO: We can relax these constraints a bit more.
if (ShAmt) {
- bool CanFold = Shift->isLogicalShift();
- if (!CanFold) {
+ bool CanFold = false;
+ unsigned ShiftOpcode = Shift->getOpcode();
+ if (ShiftOpcode == Instruction::AShr) {
// To test for the bad case of the signed shr, see if any
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@@ -1208,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
+ } else if (ShiftOpcode == Instruction::Shl ||
+ ShiftOpcode == Instruction::LShr) {
+ CanFold = !ICI.isSigned();
}
if (CanFold) {
@@ -1781,8 +1793,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits() ==
- cast<IntegerType>(DestTy)->getBitWidth()) {
+ TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -2035,14 +2046,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
}
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The descision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+ const Value * Op1) {
+ // Filter out pointer value as those cannot appears directly in subtract.
+ // FIXME: we may want to go through inttoptrs or bitcasts.
+ if (Op0->getType()->isPointerTy())
+ return false;
+ // Count every uses of both Op0 and Op1 in a subtract.
+ // Each time Op0 is the first operand, count -1: swapping is bad, the
+ // subtract has already the same layout as the compare.
+ // Each time Op0 is the second operand, count +1: swapping is good, the
+ // subtract has a diffrent layout as the compare.
+ // At the end, if the benefit is greater than 0, Op0 should come second to
+ // expose more CSE opportunities.
+ int GlobalSwapBenefits = 0;
+ for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+ continue;
+ // If Op0 is the first argument, this is not beneficial to swap the
+ // arguments.
+ int LocalSwapBenefits = -1;
+ unsigned Op1Idx = 1;
+ if (BinOp->getOperand(Op1Idx) == Op0) {
+ Op1Idx = 0;
+ LocalSwapBenefits = 1;
+ }
+ if (BinOp->getOperand(Op1Idx) != Op1)
+ continue;
+ GlobalSwapBenefits += LocalSwapBenefits;
+ }
+ return GlobalSwapBenefits > 0;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned Op0Cplxity = getComplexity(Op0);
+ unsigned Op1Cplxity = getComplexity(Op1);
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
- if (getComplexity(Op0) < getComplexity(Op1)) {
+ if (Op0Cplxity < Op1Cplxity ||
+ (Op0Cplxity == Op1Cplxity &&
+ swapMayExposeCSEOpportunities(Op0, Op1))) {
I.swapOperands();
std::swap(Op0, Op1);
Changed = true;
@@ -2477,7 +2533,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2900,6 +2956,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateTrunc(B, A->getType()));
}
+ // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+ // For lshr and ashr pairs.
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+ ? ICmpInst::ICMP_UGE
+ : ICmpInst::ICMP_ULT;
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+ return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2930,20 +3004,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
// icmp X, X+Cst
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
return Changed ? &I : 0;
}
-
-
-
-
-
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
///
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e2d7966..4c861b3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -180,12 +180,13 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
- Value *Idx[2];
- Idx[0] = NullIdx;
- Idx[1] = NullIdx;
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(AI.getType())
+ : Type::getInt64Ty(AI.getContext());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = { NullIdx, NullIdx };
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -262,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
EraseInstFromFunction(*ToDelete[i]);
Constant *TheSrc = cast<Constant>(Copy->getSource());
- Instruction *NewI
- = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
- AI.getType()));
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+ Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
EraseInstFromFunction(*Copy);
++NumGlobalCopies;
return NewI;
@@ -302,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
- Value *Idxs[2];
- Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
- Idxs[1] = Idxs[0];
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(SrcTy)
+ : Type::getInt64Ty(SrcTy->getContext());
+ Value *Idx = Constant::getNullValue(IdxTy);
+ Value *Idxs[2] = { Idx, Idx };
CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
@@ -315,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ (SrcPTy->isPtrOrPtrVectorTy() ==
+ LI.getType()->isPtrOrPtrVectorTy()) &&
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cc6a301..a759548 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
- if (isNormalFp(F))
- R = BinaryOperator::CreateFDiv(F, Opnd1);
+ if (FMulOrDiv->hasOneUse()) {
+ // It would otherwise introduce another div.
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ }
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
@@ -460,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
- Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
- BinaryOperator::CreateFAdd(M0, M1) :
- BinaryOperator::CreateFSub(M0, M1);
- Instruction *RI = cast<Instruction>(R);
+ Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+ ? BinaryOperator::CreateFAdd(M0, M1)
+ : BinaryOperator::CreateFSub(M0, M1);
RI->copyFastMathFlags(&I);
return RI;
}
@@ -490,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// if pattern detected emit alternate sequence
if (OpX && OpY) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
- Instruction *FMul = cast<Instruction>(FMulVal);
- FMul->copyFastMathFlags(Log2);
- Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
- FSub->copyFastMathFlags(Log2);
- return FSub;
+ Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ FSub->takeName(&I);
+ return ReplaceInstUsesWith(I, FSub);
}
}
@@ -506,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
for (int i = 0; i < 2; i++) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -516,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
Value *T = Builder->CreateFMul(N0, Opnd1);
- cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
- Instruction *Neg = BinaryOperator::CreateFNeg(T);
- if (I.getFastMathFlags().any()) {
- cast<Instruction>(T)->copyFastMathFlags(&I);
- Neg->copyFastMathFlags(&I);
- }
- return Neg;
+ Value *Neg = Builder->CreateFNeg(T);
+ Neg->takeName(&I);
+ return ReplaceInstUsesWith(I, Neg);
}
}
@@ -545,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Y = Opnd0_0;
if (Y) {
- Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
- T->copyFastMathFlags(&I);
- T->setDebugLoc(I.getDebugLoc());
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder->CreateFMul(Opnd1, Opnd1);
- Instruction *R = BinaryOperator::CreateFMul(T, Y);
- R->copyFastMathFlags(&I);
- return R;
+ Value *R = Builder->CreateFMul(T, Y);
+ R->takeName(&I);
+ return ReplaceInstUsesWith(I, R);
}
}
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bd14e81..4c6d0c4 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -604,8 +604,6 @@ namespace llvm {
LHS.Width == RHS.Width;
}
};
- template <>
- struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
}
@@ -688,10 +686,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
- DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
- );
+ DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+ );
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -772,7 +770,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
PredValues.clear();
- DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
@@ -792,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, TD))
+ if (Value *V = SimplifyInstruction(&PN, TD, TLI))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7581dbe..283bec2 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality())
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return 0;
Value *CmpLHS = IC->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a7bfe09..c831ddd 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return 0;
@@ -845,21 +844,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
- unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
- unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+ const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
+ const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+ if (!ShlOp1 || !ShrOp1)
+ return 0; // Noop.
+
+ Value *VarX = Shr->getOperand(0);
+ Type *Ty = VarX->getType();
+ unsigned BitWidth = Ty->getIntegerBitWidth();
+ if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
+ return 0; // Undef.
+
+ unsigned ShlAmt = ShlOp1.getZExtValue();
+ unsigned ShrAmt = ShrOp1.getZExtValue();
KnownOne.clearAllBits();
KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
KnownZero &= DemandedMask;
- if (ShlAmt == 0 || ShrAmt == 0)
- return 0;
-
- Value *VarX = Shr->getOperand(0);
- Type *Ty = VarX->getType();
-
- APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
- APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+ APInt BitMask1(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask2(APInt::getAllOnesValue(BitWidth));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f3de6e2..1e72410 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// If we have a PHI node with a vector type that has only 2 uses: feed
-// itself and be an operand of extractelemnt at a constant location,
-// try to replace the PHI of the vector type with a PHI of a scalar type
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
@@ -282,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (SI->hasOneUse()) {
+ // TODO: For a select on vectors, it might be useful to do this if it
+ // has multiple extractelement uses. For vector select, that seems to
+ // fight the vectorizer.
+
+ // If we are extracting an element from a vector select or a select on
+ // vectors, a select on the scalars extracted from the vector arguments.
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Cond = Builder->CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
+ }
+
+ Value *V1Elem
+ = Builder->CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
+
+ Value *V2Elem
+ = Builder->CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
+ return SelectInst::Create(Cond,
+ V1Elem,
+ V2Elem,
+ SI->getName() + ".elt");
+ }
}
}
return 0;
@@ -294,7 +326,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = V->getType()->getVectorNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 19959c0..f84db27 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -37,7 +37,7 @@ public:
/// in it.
void Add(Instruction *I) {
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -54,7 +54,7 @@ public:
assert(Worklist.empty() && "Worklist must be empty to add initial group");
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
- DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
WorklistMap.insert(std::make_pair(I, Idx++));
@@ -74,8 +74,7 @@ public:
}
Instruction *RemoveOne() {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
WorklistMap.erase(I);
return I;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b34ae21..191a101 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
+ // even if currently isNullValue gives false.
+ Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (InC && !isa<ConstantExpr>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else
InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -755,19 +758,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset. If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- if (!TD) return 0;
- if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset. If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ assert(PtrTy->isPtrOrPtrVectorTy());
+
+ if (!TD)
+ return 0;
+
+ Type *Ty = PtrTy->getPointerElementType();
+ if (!Ty->isSized())
+ return 0;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1176,6 +1185,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
+ // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (TD && GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+ unsigned AS = GEP.getPointerAddressSpace();
+ if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+ GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ TD->getPointerSizeInBits(AS)) {
+ Operator *Index = cast<Operator>(GEP.getOperand(1));
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ }
+
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
@@ -1231,13 +1256,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
- Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ Type *ResElTy = PtrOp->getType()->getPointerElementType();
if (TD && SrcElTy->isArrayTy() &&
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Idx[1] = GEP.getOperand(1);
+ Type *IdxType = TD->getIntPtrType(GEP.getType());
+ Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
@@ -1261,7 +1285,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1287,8 +1311,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize =
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ uint64_t ArrayEltSize
+ = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1296,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1304,9 +1328,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *Off[2];
- Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Off[1] = NewIdx;
+ Value *Off[2] = {
+ Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
+ NewIdx
+ };
+
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
@@ -1318,15 +1344,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
+ if (!TD)
+ return 0;
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD &&
- !isa<BitCastInst>(BCI->getOperand(0)) &&
+ Value *Operand = BCI->getOperand(0);
+ PointerType *OpType = cast<PointerType>(Operand->getType());
+ unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
+ APInt Offset(OffsetBits, 0);
+ if (!isa<BitCastInst>(Operand) &&
GEP.accumulateConstantOffset(*TD, Offset) &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1335,8 +1366,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Offset) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
- if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0), TLI)) {
+ if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1347,19 +1377,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return &GEP;
}
}
- return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ return new BitCastInst(Operand, GEP.getType());
}
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
- Type *InTy =
- cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+ Builder->CreateInBoundsGEP(Operand, NewIndices) :
+ Builder->CreateGEP(Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1372,8 +1400,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return 0;
}
-
-
static bool
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
const TargetLibraryInfo *TLI) {
@@ -2209,7 +2235,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
- DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
continue;
}
@@ -2217,7 +2243,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
@@ -2293,7 +2319,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
- DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
{
@@ -2338,7 +2364,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
MadeIRChange = true;
@@ -2348,7 +2374,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
ReplaceInstUsesWith(*I, C);
@@ -2396,13 +2422,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
std::string OrigI;
#endif
DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
- DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+ DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
if (Instruction *Result = visit(*I)) {
++NumCombined;
// Should we replace the old instruction with a new one?
if (Result != I) {
- DEBUG(errs() << "IC: Old = " << *I << '\n'
+ DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
if (!I->getDebugLoc().isUnknown())
@@ -2431,7 +2457,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
EraseInstFromFunction(*I);
} else {
#ifndef NDEBUG
- DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
<< " New = " << *I << '\n');
#endif