diff options
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 13 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 184 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 34 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 156 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 119 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 30 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 51 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombinePHI.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 38 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineWorklist.h | 7 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 112 |
13 files changed, 545 insertions, 241 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index b3084cc..a5eddc2 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -158,8 +158,8 @@ public: ConstantInt *DivRHS); Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); - Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, Value *TheAdd); + Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, @@ -178,6 +178,7 @@ public: Instruction *visitPtrToInt(PtrToIntInst &CI); Instruction *visitIntToPtr(IntToPtrInst &CI); Instruction *visitBitCast(BitCastInst &CI); + Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); @@ -212,8 +213,8 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices); + Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually @@ -271,7 +272,7 @@ public: if (&I == V) V = UndefValue::get(I.getType()); - DEBUG(errs() << "IC: Replacing " << I << "\n" + DEBUG(dbgs() << "IC: Replacing " << I << "\n" " with " << *V << '\n'); I.replaceAllUsesWith(V); @@ -283,7 +284,7 @@ public: // instruction. Instead, visit methods should return the value returned by // this function. Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(errs() << "IC: ERASE " << I << '\n'); + DEBUG(dbgs() << "IC: ERASE " << I << '\n'); assert(I.use_empty() && "Cannot erase instruction that is used!"); // Make sure that we reprocess all operands now that we reduced their diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b474bd8..88bb69b 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -488,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, return result; } +/// Convert an analysis of a masked ICmp into its equivalent if all boolean +/// operations had the opposite sense. Since each "NotXXX" flag (recording !=) +/// is adjacent to the corresponding normal flag (recording ==), this just +/// involves swapping those bits over. +static unsigned conjugateICmpMask(unsigned Mask) { + unsigned NewMask; + NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes | + FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed | + FoldMskICmp_BMask_Mixed)) + << 1; + + NewMask |= + (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes | + FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed | + FoldMskICmp_BMask_NotMixed)) + >> 1; + + return NewMask; +} + /// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) /// if possible. The returned predicate is either == or !=. Returns false if /// decomposition fails. @@ -548,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, L21 = L22 = L1 = 0; } else { // Look for ANDs in the LHS icmp. - if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { - if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) - L21 = L22 = 0; - } else { - if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) - return 0; - std::swap(L1, L2); + if (!L1->getType()->isIntegerTy()) { + // You can icmp pointers, for example. They really aren't masks. + L11 = L12 = 0; + } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { + // Any icmp can be viewed as being trivially masked; if it allows us to + // remove one, it's worth it. + L11 = L1; + L12 = Constant::getAllOnesValue(L1->getType()); + } + + if (!L2->getType()->isIntegerTy()) { + // You can icmp pointers, for example. They really aren't masks. L21 = L22 = 0; + } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { + L21 = L2; + L22 = Constant::getAllOnesValue(L2->getType()); } } @@ -576,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, return 0; } E = R2; R1 = 0; ok = true; - } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + } else if (R1->getType()->isIntegerTy()) { + if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { + // As before, model no mask as a trivial mask if it'll let us do an + // optimisation. + R11 = R1; + R12 = Constant::getAllOnesValue(R1->getType()); + } + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { A = R11; D = R12; E = R2; ok = true; } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { @@ -589,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, return 0; // Look for ANDs in on the right side of the RHS icmp. - if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { + if (!ok && R2->getType()->isIntegerTy()) { + if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) { + R11 = R2; + R12 = Constant::getAllOnesValue(R2->getType()); + } + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { A = R11; D = R12; E = R1; ok = true; } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { @@ -618,8 +658,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, /// foldLogOpOfMaskedICmps: /// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y) -static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, - ICmpInst::Predicate NEWCC, +static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy* Builder) { Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -629,8 +668,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); - if (NEWCC == ICmpInst::ICMP_NE) - mask >>= 1; // treat "Not"-states as normal states + // In full generality: + // (icmp (A & B) Op C) | (icmp (A & D) Op E) + // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] + // + // If the latter can be converted into (icmp (A & X) Op Y) then the former is + // equivalent to (icmp (A & X) !Op Y). + // + // Therefore, we can pretend for the rest of this function that we're dealing + // with the conjunction, provided we flip the sense of any comparisons (both + // input and output). + + // In most cases we're going to produce an EQ for the "&&" case. + ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; + if (!IsAnd) { + // Convert the masking analysis into its equivalent with negated + // comparisons. + mask = conjugateICmpMask(mask); + } if (mask & FoldMskICmp_Mask_AllZeroes) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) @@ -657,6 +712,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, Value* newAnd = Builder->CreateAnd(A, newAnd1); return Builder->CreateICmp(NEWCC, newAnd, A); } + + // Remaining cases assume at least that B and D are constant, and depend on + // their actual values. This isn't strictly, necessary, just a "handle the + // easy cases for now" decision. + ConstantInt *BCst = dyn_cast<ConstantInt>(B); + if (BCst == 0) return 0; + ConstantInt *DCst = dyn_cast<ConstantInt>(D); + if (DCst == 0) return 0; + + if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { + // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) + // Only valid if one of the masks is a superset of the other (check "B&D" is + // the same as either B or D). + APInt NewMask = BCst->getValue() & DCst->getValue(); + + if (NewMask == BCst->getValue()) + return LHS; + else if (NewMask == DCst->getValue()) + return RHS; + } + if (mask & FoldMskICmp_AMask_NotAllOnes) { + // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) + // Only valid if one of the masks is a superset of the other (check "B|D" is + // the same as either B or D). + APInt NewMask = BCst->getValue() | DCst->getValue(); + + if (NewMask == BCst->getValue()) + return LHS; + else if (NewMask == DCst->getValue()) + return RHS; + } if (mask & FoldMskICmp_BMask_Mixed) { // (icmp eq (A & B), C) & (icmp eq (A & D), E) // We already know that B & C == C && D & E == E. @@ -665,14 +754,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, // contradict, then we can transform to // -> (icmp eq (A & (B|D)), (C|E)) // Currently, we only handle the case of B, C, D, and E being constant. - ConstantInt *BCst = dyn_cast<ConstantInt>(B); - if (BCst == 0) return 0; - ConstantInt *DCst = dyn_cast<ConstantInt>(D); - if (DCst == 0) return 0; // we can't simply use C and E, because we might actually handle // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set - ConstantInt *CCst = dyn_cast<ConstantInt>(C); if (CCst == 0) return 0; if (LHSCC != NEWCC) @@ -715,7 +799,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder)) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) return V; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). @@ -849,10 +933,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 return RHS; case ICmpInst::ICMP_NE: + // Special case to get the ordering right when the values wrap around + // zero. + if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue()) + std::swap(LHSCst, RHSCst); if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 Constant *AddCST = ConstantExpr::getNeg(LHSCst); Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1)); + return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1), + Val->getName()+".cmp"); } break; // (X != 13 & X != 15) -> no change } @@ -1454,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return 0; } +/// IsOneHotValue - Returns true for "one-hot" values (values where at most +/// one bit can be set). +static bool IsOneHotValue(Value *V) { + // Match 1<<K. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) + if (BO->getOpcode() == Instruction::Shl) { + ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0)); + return One && One->isOne(); + } + + // Check for power of two integer constants. + if (ConstantInt *K = dyn_cast<ConstantInt>(V)) + return K->getValue().isPowerOf2(); + + return false; +} + /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) + // if K1 and K2 are a one-bit mask. + ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); + + if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() && + RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) { + + BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0)); + BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0)); + if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && + LAnd->getOpcode() == Instruction::And && + RAnd->getOpcode() == Instruction::And) { + + Value *Mask = 0; + Value *Masked = 0; + if (LAnd->getOperand(0) == RAnd->getOperand(0) && + IsOneHotValue(LAnd->getOperand(1)) && + IsOneHotValue(RAnd->getOperand(1))) { + Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); + Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); + } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && + IsOneHotValue(LAnd->getOperand(0)) && + IsOneHotValue(RAnd->getOperand(0))) { + Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); + Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); + } + + if (Masked) + return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); + } + } + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (PredicatesFoldable(LHSCC, RHSCC)) { if (LHS->getOperand(0) == RHS->getOperand(1) && @@ -1474,13 +1613,10 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // handle (roughly): // (icmp ne (A & B), C) | (icmp ne (A & D), E) - if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder)) + if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder)) return V; Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); - ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); - ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); - if (LHS->hasOneUse() || RHS->hasOneUse()) { // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9f74fd6..0cd7b14 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -999,20 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Check to see if we are changing the return type... if (OldRetTy != NewRetTy) { - if (Callee->isDeclaration() && - // Conversion is ok if changing from one pointer type to another or from - // a pointer to an integer of the same size. - !((OldRetTy->isPointerTy() || !TD || - OldRetTy == TD->getIntPtrType(Caller->getContext())) && - (NewRetTy->isPointerTy() || !TD || - NewRetTy == TD->getIntPtrType(Caller->getContext())))) - return false; // Cannot transform this return value. + if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { + if (Callee->isDeclaration()) + return false; // Cannot transform this return value. - if (!Caller->use_empty() && - // void -> non-void is handled specially - !NewRetTy->isVoidTy() && - !CastInst::isBitCastable(NewRetTy, OldRetTy)) + if (!Caller->use_empty() && + // void -> non-void is handled specially + !NewRetTy->isVoidTy()) return false; // Cannot transform this return value. + } if (!CallerPAL.isEmpty() && !Caller->use_empty()) { AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); @@ -1045,9 +1040,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Type *ParamTy = FT->getParamType(i); Type *ActTy = (*AI)->getType(); - if (!CastInst::isBitCastable(ActTy, ParamTy)) { + if (!CastInst::isBitCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - } if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). hasAttributes(AttributeFuncs:: @@ -1063,21 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); + Type *CurElTy = ActTy->getPointerElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) return false; } - - // Converting from one pointer type to another or between a pointer and an - // integer of the same size is safe even if we do not have a body. - bool isConvertible = ActTy == ParamTy || - (TD && ((ParamTy->isPointerTy() || - ParamTy == TD->getIntPtrType(Caller->getContext())) && - (ActTy->isPointerTy() || - ActTy == TD->getIntPtrType(Caller->getContext())))); - if (Callee->isDeclaration() && !isConvertible) - return false; } if (Callee->isDeclaration()) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 361acdd..72377dc 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1229,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } + // (fptrunc (select cond, R1, Cst)) --> + // (select cond, (fptrunc R1), (fptrunc Cst)) + SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)); + if (SI && + (isa<ConstantFP>(SI->getOperand(1)) || + isa<ConstantFP>(SI->getOperand(2)))) { + Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), + CI.getType()); + Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), + CI.getType()); + return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0)); if (II) { switch (II->getIntrinsicID()) { @@ -1249,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) + // Note that we restrict this transformation based on + // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because + // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the + // single-precision intrinsic can be expanded in the backend. CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && - Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) && + (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) || + Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) && Call->getNumArgOperands() == 1 && Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); @@ -1262,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", - Callee->getAttributes(), - Builder->getFloatTy(), - Builder->getFloatTy(), - NULL); + Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ? + Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) : + M->getOrInsertFunction("sqrtf", Callee->getAttributes(), + Builder->getFloatTy(), Builder->getFloatTy(), + NULL); CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); @@ -1338,14 +1356,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() != - TD->getPointerSizeInBits()) { - Type *Ty = TD->getIntPtrType(CI.getContext()); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); - return new IntToPtrInst(P, CI.getType()); + + if (TD) { + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + TD->getPointerSizeInBits(AS)) { + Type *Ty = TD->getIntPtrType(CI.getContext(), AS); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); + } } if (Instruction *I = commonCastTransforms(CI)) @@ -1370,25 +1392,32 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return &CI; } + if (!TD) + return commonCastTransforms(CI); + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. - APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0); - if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) && + unsigned AS = GEP->getPointerAddressSpace(); + unsigned OffsetBits = TD->getPointerSizeInBits(AS); + APInt Offset(OffsetBits, 0); + BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); + if (GEP->hasOneUse() && + BCI && GEP->accumulateConstantOffset(*TD, Offset)) { // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - Type *GEPIdxTy = - cast<PointerType>(OrigBase->getType())->getElementType(); + Value *OrigBase = BCI->getOperand(0); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) { + if (FindElementAtOffset(OrigBase->getType(), + Offset.getSExtValue(), + NewIndices)) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, NewIndices) : - Builder->CreateGEP(OrigBase, NewIndices); + Builder->CreateInBoundsGEP(OrigBase, NewIndices) : + Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1406,16 +1435,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) { - Type *Ty = TD->getIntPtrType(CI.getContext()); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty); - return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false); - } + if (!TD) + return commonPointerCastTransforms(CI); + + Type *Ty = CI.getType(); + unsigned AS = CI.getPointerAddressSpace(); + + if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS)) + return commonPointerCastTransforms(CI); - return commonPointerCastTransforms(CI); + Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS); + if (Ty->isVectorTy()) // Handle vectors of pointers. + PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } /// OptimizeVectorResize - This input value (which is known to have vector type) @@ -1488,12 +1523,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { /// insertions into the vector. See the example in the comment for /// OptimizeIntegerToVectorInsertions for the pattern this handles. /// The type of V is always a non-zero multiple of VecEltTy's size. +/// Shift is the number of bits between the lsb of V and the lsb of +/// the vector. /// /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. -static bool CollectInsertionElements(Value *V, unsigned ElementIndex, +static bool CollectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl<Value*> &Elements, - Type *VecEltTy) { + Type *VecEltTy, InstCombiner &IC) { + assert(isMultipleOfTypeSize(Shift, VecEltTy) && + "Shift should be a multiple of the element type size"); + // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; @@ -1505,8 +1545,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (C->isNullValue()) return true; + unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); + if (IC.getDataLayout()->isBigEndian()) + ElementIndex = Elements.size() - ElementIndex - 1; + // Fail if multiple elements are inserted into this slot. - if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + if (Elements[ElementIndex] != 0) return false; Elements[ElementIndex] = V; @@ -1522,7 +1566,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, // it to the right type so it gets properly inserted. if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), - ElementIndex, Elements, VecEltTy); + Shift, Elements, VecEltTy, IC); // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. @@ -1533,10 +1577,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); for (unsigned i = 0; i != NumElts; ++i) { + unsigned ShiftI = Shift+i*ElementSize; Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), - i*ElementSize)); + ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC)) return false; } return true; @@ -1549,29 +1594,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy) && - CollectInsertionElements(I->getOperand(1), ElementIndex, - Elements, VecEltTy); + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC) && + CollectInsertionElements(I->getOperand(1), Shift, + Elements, VecEltTy, IC); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; - if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; - unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - - return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, - Elements, VecEltTy); + Shift += CI->getZExtValue(); + if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; + return CollectInsertionElements(I->getOperand(0), Shift, + Elements, VecEltTy, IC); } } @@ -1594,12 +1638,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, - DestVecTy->getElementType())) + DestVecTy->getElementType(), IC)) return 0; // If we succeeded, we know that all of the element are specified by Elements @@ -1785,10 +1832,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. if (SVI->hasOneUse() && DestTy->isVectorTy() && - cast<VectorType>(DestTy)->getNumElements() == - SVI->getType()->getNumElements() && + DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) { + SVI->getOperand(0)->getType()->getVectorNumElements()) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow @@ -1810,3 +1856,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return commonPointerCastTransforms(CI); return commonCastTransforms(CI); } + +Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + return commonCastTransforms(CI); +} diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c0225ae..9bb65ef 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -227,7 +227,8 @@ Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && TD == 0) return 0; + if (!GEP->isInBounds() && TD == 0) + return 0; Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) @@ -393,9 +394,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If the index is larger than the pointer size of the target, truncate the // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. - if (!GEP->isInBounds() && - Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) - Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); + if (!GEP->isInBounds()) { + Type *IntPtrTy = TD->getIntPtrType(GEP->getType()); + unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); + if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) + Idx = Builder->CreateTrunc(Idx, IntPtrTy); + } // If the comparison is only true for one or two elements, emit direct // comparisons. @@ -562,16 +566,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { } } + + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType()); + unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); if (Offset == 0) { // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; @@ -593,7 +599,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { return 0; // Okay, we can do this evaluation. Start by converting the index to intptr. - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); @@ -737,10 +742,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". -Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, +Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, - Value *TheAdd) { + ICmpInst::Predicate Pred) { // If we have X+0, exit early (simplifying logic below) and let it get folded // elsewhere. icmp X+0, X -> icmp X, X if (CI->isZero()) { @@ -1194,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Type *AndTy = AndCST->getType(); // Type of the and. // We can fold this as long as we can't shift unknown bits - // into the mask. This can only happen with signed shift - // rights, as they sign-extend. + // into the mask. This can happen with signed shift + // rights, as they sign-extend. With logical shifts, + // we must still make sure the comparison is not signed + // because we are effectively changing the + // position of the sign bit (PR17827). + // TODO: We can relax these constraints a bit more. if (ShAmt) { - bool CanFold = Shift->isLogicalShift(); - if (!CanFold) { + bool CanFold = false; + unsigned ShiftOpcode = Shift->getOpcode(); + if (ShiftOpcode == Instruction::AShr) { // To test for the bad case of the signed shr, see if any // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); @@ -1208,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; + } else if (ShiftOpcode == Instruction::Shl || + ShiftOpcode == Instruction::LShr) { + CanFold = !ICI.isSigned(); } if (CanFold) { @@ -1781,8 +1793,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits() == - cast<IntegerType>(DestTy)->getBitWidth()) { + TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); @@ -2035,14 +2046,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, } +/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst +/// should be swapped. +/// The descision is based on how many times these two operands are reused +/// as subtract operands and their positions in those instructions. +/// The rational is that several architectures use the same instruction for +/// both subtract and cmp, thus it is better if the order of those operands +/// match. +/// \return true if Op0 and Op1 should be swapped. +static bool swapMayExposeCSEOpportunities(const Value * Op0, + const Value * Op1) { + // Filter out pointer value as those cannot appears directly in subtract. + // FIXME: we may want to go through inttoptrs or bitcasts. + if (Op0->getType()->isPointerTy()) + return false; + // Count every uses of both Op0 and Op1 in a subtract. + // Each time Op0 is the first operand, count -1: swapping is bad, the + // subtract has already the same layout as the compare. + // Each time Op0 is the second operand, count +1: swapping is good, the + // subtract has a diffrent layout as the compare. + // At the end, if the benefit is greater than 0, Op0 should come second to + // expose more CSE opportunities. + int GlobalSwapBenefits = 0; + for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) { + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI); + if (!BinOp || BinOp->getOpcode() != Instruction::Sub) + continue; + // If Op0 is the first argument, this is not beneficial to swap the + // arguments. + int LocalSwapBenefits = -1; + unsigned Op1Idx = 1; + if (BinOp->getOperand(Op1Idx) == Op0) { + Op1Idx = 0; + LocalSwapBenefits = 1; + } + if (BinOp->getOperand(Op1Idx) != Op1) + continue; + GlobalSwapBenefits += LocalSwapBenefits; + } + return GlobalSwapBenefits > 0; +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + unsigned Op0Cplxity = getComplexity(Op0); + unsigned Op1Cplxity = getComplexity(Op1); /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. - if (getComplexity(Op0) < getComplexity(Op1)) { + if (Op0Cplxity < Op1Cplxity || + (Op0Cplxity == Op1Cplxity && + swapMayExposeCSEOpportunities(Op0, Op1))) { I.swapOperands(); std::swap(Op0, Op1); Changed = true; @@ -2477,7 +2533,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -2900,6 +2956,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Builder->CreateTrunc(B, A->getType())); } + // (A >> C) == (B >> C) --> (A^B) u< (1 << C) + // For lshr and ashr pairs. + if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || + (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { + unsigned TypeBits = Cst1->getBitWidth(); + unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + if (ShAmt < TypeBits && ShAmt != 0) { + ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE + ? ICmpInst::ICMP_UGE + : ICmpInst::ICMP_ULT; + Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); + return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); + } + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; @@ -2930,20 +3004,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Value *X; ConstantInt *Cst; // icmp X+Cst, X if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); + return FoldICmpAddOpCst(I, X, Cst, I.getPredicate()); // icmp X, X+Cst if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); + return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); } return Changed ? &I : 0; } - - - - - /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. /// Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e2d7966..4c861b3 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + Type *IntPtrTy = TD->getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -180,12 +180,13 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // - Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); - Value *Idx[2]; - Idx[0] = NullIdx; - Idx[1] = NullIdx; + Type *IdxTy = TD + ? TD->getIntPtrType(AI.getType()) + : Type::getInt64Ty(AI.getContext()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = { NullIdx, NullIdx }; Instruction *GEP = - GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); + GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original @@ -262,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast<Constant>(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); + Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; @@ -302,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { - Value *Idxs[2]; - Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); - Idxs[1] = Idxs[0]; + Type *IdxTy = TD + ? TD->getIntPtrType(SrcTy) + : Type::getInt64Ty(SrcTy->getContext()); + Value *Idx = Constant::getNullValue(IdxTy); + Value *Idxs[2] = { Idx, Idx }; CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); @@ -315,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. - (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && + (SrcPTy->isPtrOrPtrVectorTy() == + LI.getType()->isPtrOrPtrVectorTy()) && IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cc6a301..a759548 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, } else { if (C0) { // (C0 / X) * C => (C0 * C) / X - ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C)); - if (isNormalFp(F)) - R = BinaryOperator::CreateFDiv(F, Opnd1); + if (FMulOrDiv->hasOneUse()) { + // It would otherwise introduce another div. + ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C)); + if (isNormalFp(F)) + R = BinaryOperator::CreateFDiv(F, Opnd1); + } } else { // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1)); @@ -460,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Swap && FAddSub->getOpcode() == Instruction::FSub) std::swap(M0, M1); - Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ? - BinaryOperator::CreateFAdd(M0, M1) : - BinaryOperator::CreateFSub(M0, M1); - Instruction *RI = cast<Instruction>(R); + Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd) + ? BinaryOperator::CreateFAdd(M0, M1) + : BinaryOperator::CreateFSub(M0, M1); RI->copyFastMathFlags(&I); return RI; } @@ -490,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // if pattern detected emit alternate sequence if (OpX && OpY) { + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); Value *FMulVal = Builder->CreateFMul(OpX, Log2); - Instruction *FMul = cast<Instruction>(FMulVal); - FMul->copyFastMathFlags(Log2); - Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX); - FSub->copyFastMathFlags(Log2); - return FSub; + Value *FSub = Builder->CreateFSub(FMulVal, OpX); + FSub->takeName(&I); + return ReplaceInstUsesWith(I, FSub); } } @@ -506,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { for (int i = 0; i < 2; i++) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(I.getFastMathFlags()); + Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); @@ -516,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Opnd0->hasOneUse()) { // -X * Y => -(X*Y) (Promote negation as high as possible) Value *T = Builder->CreateFMul(N0, Opnd1); - cast<Instruction>(T)->setDebugLoc(I.getDebugLoc()); - Instruction *Neg = BinaryOperator::CreateFNeg(T); - if (I.getFastMathFlags().any()) { - cast<Instruction>(T)->copyFastMathFlags(&I); - Neg->copyFastMathFlags(&I); - } - return Neg; + Value *Neg = Builder->CreateFNeg(T); + Neg->takeName(&I); + return ReplaceInstUsesWith(I, Neg); } } @@ -545,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Y = Opnd0_0; if (Y) { - Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1)); - T->copyFastMathFlags(&I); - T->setDebugLoc(I.getDebugLoc()); + BuilderTy::FastMathFlagGuard Guard(*Builder); + Builder->SetFastMathFlags(I.getFastMathFlags()); + Value *T = Builder->CreateFMul(Opnd1, Opnd1); - Instruction *R = BinaryOperator::CreateFMul(T, Y); - R->copyFastMathFlags(&I); - return R; + Value *R = Builder->CreateFMul(T, Y); + R->takeName(&I); + return ReplaceInstUsesWith(I, R); } } } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index bd14e81..4c6d0c4 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -604,8 +604,6 @@ namespace llvm { LHS.Width == RHS.Width; } }; - template <> - struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; } @@ -688,10 +686,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // extracted out of it. First, sort the users by their offset and size. array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; - ); + DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n'; + ); // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. @@ -772,7 +770,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } PredValues.clear(); - DEBUG(errs() << " Made element PHI for offset " << Offset << ": " + DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": " << *EltPHI << '\n'); ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; } @@ -792,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, TD)) + if (Value *V = SimplifyInstruction(&PN, TD, TLI)) return ReplaceInstUsesWith(PN, V); // If all PHI operands are the same operation, pull them through the PHI, diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7581dbe..283bec2 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); - if (!IC || !IC->isEquality()) + if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return 0; Value *CmpLHS = IC->getOperand(0); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a7bfe09..c831ddd 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // TODO: Could compute known zero/one bits based on the input. break; } - case Intrinsic::x86_sse42_crc32_64_8: case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); return 0; @@ -845,21 +844,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) { - unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue(); - unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue(); + const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); + const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); + if (!ShlOp1 || !ShrOp1) + return 0; // Noop. + + Value *VarX = Shr->getOperand(0); + Type *Ty = VarX->getType(); + unsigned BitWidth = Ty->getIntegerBitWidth(); + if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) + return 0; // Undef. + + unsigned ShlAmt = ShlOp1.getZExtValue(); + unsigned ShrAmt = ShrOp1.getZExtValue(); KnownOne.clearAllBits(); KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); KnownZero &= DemandedMask; - if (ShlAmt == 0 || ShrAmt == 0) - return 0; - - Value *VarX = Shr->getOperand(0); - Type *Ty = VarX->getType(); - - APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); - APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); + APInt BitMask1(APInt::getAllOnesValue(BitWidth)); + APInt BitMask2(APInt::getAllOnesValue(BitWidth)); bool isLshr = (Shr->getOpcode() == Instruction::LShr); BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index f3de6e2..1e72410 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } // If we have a PHI node with a vector type that has only 2 uses: feed -// itself and be an operand of extractelemnt at a constant location, -// try to replace the PHI of the vector type with a PHI of a scalar type +// itself and be an operand of extractelement at a constant location, +// try to replace the PHI of the vector type with a PHI of a scalar type. Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) @@ -282,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } + } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + if (SI->hasOneUse()) { + // TODO: For a select on vectors, it might be useful to do this if it + // has multiple extractelement uses. For vector select, that seems to + // fight the vectorizer. + + // If we are extracting an element from a vector select or a select on + // vectors, a select on the scalars extracted from the vector arguments. + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + Value *Cond = SI->getCondition(); + if (Cond->getType()->isVectorTy()) { + Cond = Builder->CreateExtractElement(Cond, + EI.getIndexOperand(), + Cond->getName() + ".elt"); + } + + Value *V1Elem + = Builder->CreateExtractElement(TrueVal, + EI.getIndexOperand(), + TrueVal->getName() + ".elt"); + + Value *V2Elem + = Builder->CreateExtractElement(FalseVal, + EI.getIndexOperand(), + FalseVal->getName() + ".elt"); + return SelectInst::Create(Cond, + V1Elem, + V2Elem, + SI->getName() + ".elt"); + } } } return 0; @@ -294,7 +326,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); - unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + unsigned NumElts = V->getType()->getVectorNumElements(); if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 19959c0..f84db27 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -37,7 +37,7 @@ public: /// in it. void Add(Instruction *I) { if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { - DEBUG(errs() << "IC: ADD: " << *I << '\n'); + DEBUG(dbgs() << "IC: ADD: " << *I << '\n'); Worklist.push_back(I); } } @@ -54,7 +54,7 @@ public: assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); WorklistMap.resize(NumEntries); - DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (unsigned Idx = 0; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; WorklistMap.insert(std::make_pair(I, Idx++)); @@ -74,8 +74,7 @@ public: } Instruction *RemoveOne() { - Instruction *I = Worklist.back(); - Worklist.pop_back(); + Instruction *I = Worklist.pop_back_val(); WorklistMap.erase(I); return I; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index b34ae21..191a101 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) + // Beware of ConstantExpr: it may eventually evaluate to getNullValue, + // even if currently isNullValue gives false. + Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)); + if (InC && !isa<ConstantExpr>(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; else InV = Builder->CreateSelect(PN->getIncomingValue(i), @@ -755,19 +758,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { return ReplaceInstUsesWith(I, NewPN); } -/// FindElementAtOffset - Given a type and a constant offset, determine whether -/// or not there is a sequence of GEP indices into the type that will land us at -/// the specified offset. If so, fill them into NewIndices and return the -/// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices) { - if (!TD) return 0; - if (!Ty->isSized()) return 0; +/// FindElementAtOffset - Given a pointer type and a constant offset, determine +/// whether or not there is a sequence of GEP indices into the pointed type that +/// will land us at the specified offset. If so, fill them into NewIndices and +/// return the resultant element type, otherwise return null. +Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices) { + assert(PtrTy->isPtrOrPtrVectorTy()); + + if (!TD) + return 0; + + Type *Ty = PtrTy->getPointerElementType(); + if (!Ty->isSized()) + return 0; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + Type *IntPtrTy = TD->getIntPtrType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1176,6 +1185,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); } + // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y)) + // The GEP pattern is emitted by the SCEV expander for certain kinds of + // pointer arithmetic. + if (TD && GEP.getNumIndices() == 1 && + match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) { + unsigned AS = GEP.getPointerAddressSpace(); + if (GEP.getType() == Builder->getInt8PtrTy(AS) && + GEP.getOperand(1)->getType()->getScalarSizeInBits() == + TD->getPointerSizeInBits(AS)) { + Operator *Index = cast<Operator>(GEP.getOperand(1)); + Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + } + } + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); @@ -1231,13 +1256,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); - Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + Type *ResElTy = PtrOp->getType()->getPointerElementType(); if (TD && SrcElTy->isArrayTy() && - TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == + TD->getTypeAllocSize(SrcElTy->getArrayElementType()) == TD->getTypeAllocSize(ResElTy)) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); - Idx[1] = GEP.getOperand(1); + Type *IdxType = TD->getIntPtrType(GEP.getType()); + Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); @@ -1261,7 +1285,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1287,8 +1311,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. uint64_t ResSize = TD->getTypeAllocSize(ResElTy); - uint64_t ArrayEltSize = - TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); + uint64_t ArrayEltSize + = TD->getTypeAllocSize(SrcElTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1296,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1304,9 +1328,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *Off[2]; - Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); - Off[1] = NewIdx; + Value *Off[2] = { + Constant::getNullValue(TD->getIntPtrType(GEP.getType())), + NewIdx + }; + Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); @@ -1318,15 +1344,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } + if (!TD) + return 0; + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { - APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0); - if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && + Value *Operand = BCI->getOperand(0); + PointerType *OpType = cast<PointerType>(Operand->getType()); + unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType); + APInt Offset(OffsetBits, 0); + if (!isa<BitCastInst>(Operand) && GEP.accumulateConstantOffset(*TD, Offset) && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -1335,8 +1366,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0), TLI)) { + if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1347,19 +1377,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return &GEP; } } - return new BitCastInst(BCI->getOperand(0), GEP.getType()); + return new BitCastInst(Operand, GEP.getType()); } // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - Type *InTy = - cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) { + if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices); + Builder->CreateInBoundsGEP(Operand, NewIndices) : + Builder->CreateGEP(Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -1372,8 +1400,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } - - static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, const TargetLibraryInfo *TLI) { @@ -2209,7 +2235,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); + DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); continue; } @@ -2217,7 +2243,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; @@ -2293,7 +2319,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; - DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); { @@ -2338,7 +2364,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, TLI)) { - DEBUG(errs() << "IC: DCE: " << *I << '\n'); + DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; MadeIRChange = true; @@ -2348,7 +2374,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. ReplaceInstUsesWith(*I, C); @@ -2396,13 +2422,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { std::string OrigI; #endif DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { - DEBUG(errs() << "IC: Old = " << *I << '\n' + DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); if (!I->getDebugLoc().isUnknown()) @@ -2431,7 +2457,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { EraseInstFromFunction(*I); } else { #ifndef NDEBUG - DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' << " New = " << *I << '\n'); #endif |