diff options
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 70 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 226 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 138 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 19 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 58 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 44 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 80 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 71 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 137 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 38 |
11 files changed, 468 insertions, 414 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d10046c..908038b 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -136,6 +136,19 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } + + // If this is a xor that was canonicalized from a sub, turn it back into + // a sub and fuse this add with it. + if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { + IntegerType *IT = cast<IntegerType>(I.getType()); + APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(XorLHS, Mask, LHSKnownZero, LHSKnownOne); + if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), + XorLHS); + } } } @@ -466,57 +479,57 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; - GetElementPtrInst *GEP = 0; - ConstantExpr *CstGEP = 0; - - // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". + GEPOperator *GEP1 = 0, *GEP2 = 0; + // For now we require one side to be the base pointer "A" or a constant - // expression derived from it. - if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { + // GEP derived from it. + if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { - GEP = LHSGEP; + GEP1 = LHSGEP; Swapped = false; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { - // (gep X, ...) - (ce_gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - LHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = LHSGEP; + } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { + // (gep X, ...) - (gep X, ...) + if (LHSGEP->getOperand(0)->stripPointerCasts() == + RHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = RHSGEP; + GEP1 = LHSGEP; Swapped = false; } } } - if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // X - (gep X, ...) if (RHSGEP->getOperand(0) == LHS) { - GEP = RHSGEP; + GEP1 = RHSGEP; Swapped = true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { - // (ce_gep X, ...) - (gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - RHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = RHSGEP; + } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { + // (gep X, ...) - (gep X, ...) + if (RHSGEP->getOperand(0)->stripPointerCasts() == + LHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = LHSGEP; + GEP1 = RHSGEP; Swapped = true; } } } - if (GEP == 0) + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and + // multiple users. + if (GEP1 == 0 || + (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) return 0; // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP); + Value *Result = EmitGEPOffset(GEP1); // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. - if (CstGEP) { - Value *CstOffset = EmitGEPOffset(CstGEP); - Result = Builder->CreateSub(Result, CstOffset); + if (GEP2) { + Value *Offset = EmitGEPOffset(GEP2); + Result = Builder->CreateSub(Result, Offset); } - // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) @@ -587,6 +600,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { ConstantInt *C2; if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); + + if (SimplifyDemandedInstructionBits(I)) + return &I; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 5e0bfe8..cc8f5bf 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -62,50 +63,6 @@ static inline Value *dyn_castNotVal(Value *V) { return 0; } - -/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits -/// are carefully arranged to allow folding of expressions such as: -/// -/// (A < B) | (A > B) --> (A != B) -/// -/// Note that this is only valid if the first and second predicates have the -/// same sign. Is illegal to do: (A u< B) | (A s> B) -/// -/// Three bits are used to represent the condition, as follows: -/// 0 A > B -/// 1 A == B -/// 2 A < B -/// -/// <=> Value Definition -/// 000 0 Always false -/// 001 1 A > B -/// 010 2 A == B -/// 011 3 A >= B -/// 100 4 A < B -/// 101 5 A != B -/// 110 6 A <= B -/// 111 7 Always true -/// -static unsigned getICmpCode(const ICmpInst *ICI) { - switch (ICI->getPredicate()) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); - return 0; - } -} - /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp /// predicate into a three bit mask. It also returns whether it is an ordered /// predicate by reference. @@ -130,31 +87,19 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { default: // Not expecting FCMP_FALSE and FCMP_TRUE; llvm_unreachable("Unexpected FCmp predicate!"); - return 0; } } -/// getICmpValue - This is the complement of getICmpCode, which turns an +/// getNewICmpValue - This is the complement of getICmpCode, which turns an /// opcode and two operands into either a constant true or false, or a brand /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { - CmpInst::Predicate Pred; - switch (Code) { - default: assert(0 && "Illegal ICmp code!"); - case 0: // False. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; - case 2: Pred = ICmpInst::ICMP_EQ; break; - case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; - case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; - case 5: Pred = ICmpInst::ICMP_NE; break; - case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; - case 7: // True. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - } - return Builder->CreateICmp(Pred, LHS, RHS); +static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + ICmpInst::Predicate NewPred; + if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) + return NewConstant; + return Builder->CreateICmp(NewPred, LHS, RHS); } /// getFCmpValue - This is the complement of getFCmpCode, which turns an @@ -165,7 +110,7 @@ static Value *getFCmpValue(bool isordered, unsigned code, InstCombiner::BuilderTy *Builder) { CmpInst::Predicate Pred; switch (code) { - default: assert(0 && "Illegal FCmp code!"); + default: llvm_unreachable("Illegal FCmp code!"); case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; @@ -180,14 +125,6 @@ static Value *getFCmpValue(bool isordered, unsigned code, return Builder->CreateFCmp(Pred, LHS, RHS); } -/// PredicatesFoldable - Return true if both predicates match sign or if at -/// least one of them is an equality comparison (which is signless). -static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - // OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where // the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is // guaranteed to be a binary operator. @@ -558,6 +495,38 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, return result; } +/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) +/// if possible. The returned predicate is either == or !=. Returns false if +/// decomposition fails. +static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, + Value *&X, Value *&Y, Value *&Z) { + // X < 0 is equivalent to (X & SignBit) != 0. + if (I->getPredicate() == ICmpInst::ICMP_SLT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isZero()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_NE; + Z = C; + return true; + } + + // X > -1 is equivalent to (X & SignBit) == 0. + if (I->getPredicate() == ICmpInst::ICMP_SGT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isAllOnesValue()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_EQ; + Z = ConstantInt::getNullValue(C->getType()); + return true; + } + + return false; +} + /// foldLogOpOfMaskedICmpsHelper: /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// return the set of pattern classes (from MaskedICmpType) @@ -565,10 +534,9 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value*& B, Value*& C, Value*& D, Value*& E, - ICmpInst *LHS, ICmpInst *RHS) { - ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); - if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; - if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate &LHSCC, + ICmpInst::Predicate &RHSCC) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; // vectors are not (yet?) supported if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; @@ -582,40 +550,60 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value *L1 = LHS->getOperand(0); Value *L2 = LHS->getOperand(1); Value *L11,*L12,*L21,*L22; - if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { - if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + // Check whether the icmp can be decomposed into a bit test. + if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { + L21 = L22 = L1 = 0; + } else { + // Look for ANDs in the LHS icmp. + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); L21 = L22 = 0; + } } - else { - if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) - return 0; - std::swap(L1, L2); - L21 = L22 = 0; - } + + // Bail if LHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(LHSCC)) + return 0; Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); Value *R11,*R12; bool ok = false; - if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R2; ok = true; + if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; + } else { + return 0; } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + E = R2; R1 = 0; ok = true; + } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R2; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R2; ok = true; } } + + // Bail if RHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(RHSCC)) + return 0; + + // Look for ANDs in on the right side of the RHS icmp. if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R1; ok = true; - } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R1; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R1; ok = true; - } - else + } else { return 0; + } } if (!ok) return 0; @@ -644,8 +632,12 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, ICmpInst::Predicate NEWCC, llvm::InstCombiner::BuilderTy* Builder) { Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; - unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, + LHSCC, RHSCC); if (mask == 0) return 0; + assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && + "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); if (NEWCC == ICmpInst::ICMP_NE) mask >>= 1; // treat "Not"-states as normal states @@ -693,11 +685,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *CCst = dyn_cast<ConstantInt>(C); if (CCst == 0) return 0; - if (LHS->getPredicate() != NEWCC) + if (LHSCC != NEWCC) CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast<ConstantInt>(E); if (ECst == 0) return 0; - if (RHS->getPredicate() != NEWCC) + if (RHSCC != NEWCC) ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast<ConstantInt>( ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), @@ -728,7 +720,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - return getICmpValue(isSigned, Code, Op0, Op1, Builder); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -756,24 +748,12 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } - - // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } } // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 // where CMAX is the all ones value for the truncated type, // iff the lower bits of C2 and CA are zero. - if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && LHS->hasOneUse() && RHS->hasOneUse()) { Value *V; ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; @@ -805,7 +785,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } } - + // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1469,7 +1449,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - return getICmpValue(isSigned, Code, Op0, Op1, Builder); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -1490,18 +1470,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } - - // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) @@ -1586,7 +1554,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true return ConstantInt::getTrue(LHS->getContext()); } - break; case ICmpInst::ICMP_ULT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); @@ -2281,7 +2248,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); return ReplaceInstUsesWith(I, - getICmpValue(isSigned, Code, Op0, Op1, Builder)); + getNewICmpValue(isSigned, Code, Op0, Op1, + Builder)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 27c7c54..b550fe8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -37,26 +37,26 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); return MI; } - + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; - + // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. unsigned Size = MemOpLength->getZExtValue(); if (Size == 0) return MI; // Delete this mem transfer. - + if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. - + // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); @@ -66,7 +66,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); - + // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast // from double* to i8*. We'd much rather use a double load+store rather than @@ -94,20 +94,20 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } else break; } - + if (SrcETy->isSingleValueType()) { NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); } } } - - + + // If the memcpy/memmove provides better alignment info than we can // infer, use it. SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); @@ -127,7 +127,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Alignment, false)); return MI; } - + // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); @@ -135,14 +135,14 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); - + // If the length is zero, this is a no-op if (Len == 0) return MI; // memset(d,c,0,a) -> noop - + // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. - + Value *Dest = MI->getDest(); unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); @@ -150,13 +150,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; - + // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, MI->isVolatile()); S->setAlignment(Alignment); - + // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); return MI; @@ -165,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; } -/// visitCallInst - CallInst simplification. This mostly only handles folding +/// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. /// @@ -182,7 +182,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { CI.setDoesNotThrow(); return &CI; } - + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); @@ -203,7 +203,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // alignment is sufficient. } } - + // No other transformations apply to volatile transfers. if (MI->isVolatile()) return 0; @@ -242,13 +242,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - + switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { // We need target data for just about everything so depend on it. if (!TD) break; - + Type *ReturnTy = CI.getType(); uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; @@ -324,7 +324,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); - + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) @@ -336,7 +336,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return new TruncInst(V, TI->getType()); } } - + break; case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { @@ -368,7 +368,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, TrailingZeros))); - + } break; case Intrinsic::ctlz: { @@ -387,7 +387,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, LeadingZeros))); - + } break; case Intrinsic::uadd_with_overflow: { @@ -450,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + undef -> undef if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { @@ -471,7 +471,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isa<UndefValue>(II->getArgOperand(0)) || isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { @@ -479,7 +479,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -528,19 +528,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * undef -> undef if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); - + // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -559,7 +559,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); @@ -570,7 +570,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); @@ -623,19 +623,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - + if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { + assert(Mask->getType()->getVectorNumElements() == 16 && + "Bad type for intrinsic!"); + // Check that all of the elements are integer constants or undefs. bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { - if (!isa<ConstantInt>(Mask->getOperand(i)) && - !isa<UndefValue>(Mask->getOperand(i))) { + Constant *Elt = Mask->getAggregateElement(i); + if (Elt == 0 || + !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { AllEltsOk = false; break; } } - + if (AllEltsOk) { // Cast the input vectors to byte vectors. Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), @@ -643,23 +645,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); - + // Only extract each element once. Value *ExtractedElts[32]; memset(ExtractedElts, 0, sizeof(ExtractedElts)); - + for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getOperand(i))) + if (isa<UndefValue>(Mask->getAggregateElement(i))) continue; - unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); + unsigned Idx = + cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - + if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = + ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); } - + // Insert this value into the result vector. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], Builder->getInt32(i)); @@ -705,7 +708,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return EraseInstFromFunction(CI); } } - + // Scan down this block to see if there is another stack restore in the // same block without an intervening call/alloca. BasicBlock::iterator BI = II; @@ -730,12 +733,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } } - + // If the stack restore is in a return, resume, or unwind block and if there // are no allocas or calls between the restore and the return, nuke the // restore. - if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || - isa<UnwindInst>(TI))) + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -750,7 +752,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { return visitCallSite(&II); } -/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, const CastInst * const CI, @@ -765,7 +767,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CS.isByValArgument(ix)) return true; - Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) @@ -809,7 +811,7 @@ public: } // end anonymous namespace // Try to fold some different type of calls here. -// Currently we're only working with the checking functions, memcpy_chk, +// Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { @@ -918,7 +920,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { !CalleeF->isDeclaration()) { Instruction *OldCall = CS.getInstruction(); new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. @@ -926,7 +928,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); - + // We cannot remove an invoke, because it would change the CFG, just // change the callee to a null pointer. cast<InvokeInst>(OldCall)->setCalledFunction( @@ -1063,17 +1065,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - unsigned Attrs = CallerPAL.getParamAttributes(i + 1); + Attributes Attrs = CallerPAL.getParamAttributes(i + 1); if (Attrs & Attribute::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. - + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) @@ -1101,8 +1103,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) return false; + + // If both the callee and the cast type are varargs, we still have to make + // sure the number of fixed parameters are the same or we have the same + // ABI issues as if we introduce a varargs call. + if (FT->isVarArg() && + cast<FunctionType>(APTy->getElementType())->isVarArg() && + FT->getNumParams() != + cast<FunctionType>(APTy->getElementType())->getNumParams()) + return false; } - + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -1116,7 +1127,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; } - + // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. std::vector<Value*> Args; @@ -1354,11 +1365,11 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Replace the trampoline call with a direct call. Let the generic // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, + NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), NewAttrs.end()); @@ -1387,9 +1398,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // parameter, there is no need to adjust the argument list. Let the generic // code sort out any function type mismatches. Constant *NewCallee = - NestF->getType() == PTy ? NestF : + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); CS.setCalledFunction(NewCallee); return CS.getInstruction(); } - diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 46e4acd..c5ddb75 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -215,7 +215,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); - break; } Res->takeName(I); @@ -1160,6 +1159,9 @@ static Value *LookThroughFPExtensions(Value *V) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) return V; // No constant folding of this. + // See if the value can be truncated to half and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf)) + return V; // See if the value can be truncated to float and then reextended. if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) return V; @@ -1419,16 +1421,15 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector<Constant*, 16> ShuffleMask; + SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + ShuffleMask.push_back(i); } else { // If we're increasing the number of elements, shuffle in all of the @@ -1437,14 +1438,16 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, V2 = Constant::getNullValue(SrcTy); unsigned SrcElts = SrcTy->getNumElements(); for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + ShuffleMask.push_back(i); // The excess elements reference the first element of the zero input. - ShuffleMask.append(DestTy->getNumElements()-SrcElts, - ConstantInt::get(Int32Ty, SrcElts)); + for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) + ShuffleMask.push_back(SrcElts); } - return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); + return new ShuffleVectorInst(InVal, V2, + ConstantDataVector::get(V2->getContext(), + ShuffleMask)); } static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 144b92b..5308992 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -203,8 +203,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); - if (Init == 0 || Init->getNumOperands() > 1024) return 0; + Constant *Init = GV->getInitializer(); + if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) + return 0; + + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. @@ -221,7 +225,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // structs. SmallVector<unsigned, 4> LaterIndices; - Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. @@ -272,8 +276,9 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); - for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { - Constant *Elt = Init->getOperand(i); + for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { + Constant *Elt = Init->getAggregateElement(i); + if (Elt == 0) return 0; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) @@ -440,10 +445,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (Init->getNumOperands() <= 32 || - (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { + if (ArrayElementCount <= 32 || + (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { Type *Ty; - if (Init->getNumOperands() <= 32) + if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); else Ty = Type::getInt64Ty(Init->getContext()); @@ -566,6 +571,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I) { + // Don't transform signed compares of GEPs into index compares. Even if the + // GEP is inbounds, the final add of the base pointer can have signed overflow + // and would change the result of the icmp. + // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be + // the maximum signed value for the pointer type. + if (ICmpInst::isSigned(Cond)) + return 0; + // Look through bitcasts. if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) RHS = BCI->getOperand(0); @@ -602,6 +615,20 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + // If we're comparing GEPs with two base pointers that only differ in type + // and both GEPs have only constant indices or just one use, then fold + // the compare with the adjusted indices. + if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && + (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && + PtrBase->stripPointerCasts() == + GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), + EmitGEPOffset(GEPLHS), + EmitGEPOffset(GEPRHS)); + return ReplaceInstUsesWith(I, Cmp); + } + // Otherwise, the base pointers are different and the indices are // different, bail out. return 0; @@ -2709,6 +2736,17 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } + } else { + // See if the RHS value is < UnsignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || + Pred == ICmpInst::ICMP_UGE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } } // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or @@ -2848,7 +2886,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { const fltSemantics *Sem; // FIXME: This shouldn't be here. - if (LHSExt->getSrcTy()->isFloatTy()) + if (LHSExt->getSrcTy()->isHalfTy()) + Sem = &APFloat::IEEEhalf; + else if (LHSExt->getSrcTy()->isFloatTy()) Sem = &APFloat::IEEEsingle; else if (LHSExt->getSrcTy()->isDoubleTy()) Sem = &APFloat::IEEEdouble; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2f82b7b..5168e2a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -256,22 +256,18 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Simplify mul instructions with a constant RHS... + // Simplify mul instructions with a constant RHS. if (Constant *Op1C = dyn_cast<Constant>(Op1)) { if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) if (Op1F->isExactlyValue(1.0)) return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0' - } else if (Op1C->getType()->isVectorTy()) { - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } - } + } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue())) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); } // Try to fold constant mul into select arguments. @@ -688,28 +684,36 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } // If it's a constant vector, flip any negative values positive. - if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { - unsigned VWidth = RHSV->getNumOperands(); + if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) { + Constant *C = cast<Constant>(Op1); + unsigned VWidth = C->getType()->getVectorNumElements(); bool hasNegative = false; - for (unsigned i = 0; !hasNegative && i != VWidth; ++i) - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) + bool hasMissing = false; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) { + hasMissing = true; + break; + } + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt)) if (RHS->isNegative()) hasNegative = true; + } - if (hasNegative) { - std::vector<Constant *> Elts(VWidth); + if (hasNegative && !hasMissing) { + SmallVector<Constant *, 16> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { + Elts[i] = C->getAggregateElement(i); // Handle undef, etc. + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) { if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); - else - Elts[i] = RHS; } } Constant *NewRHSV = ConstantVector::get(Elts); - if (NewRHSV != RHSV) { + if (NewRHSV != C) { // Don't loop on -MININT Worklist.AddValue(I.getOperand(1)); I.setOperand(1, NewRHSV); return &I; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index f1ea8ea..e727b2c 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -184,7 +184,6 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); } llvm_unreachable("Shouldn't get here"); - return 0; } static bool isSelect01(Constant *C1, Constant *C2) { diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 702e0f2..b31049e 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -199,7 +199,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, IC.Worklist.Add(I); switch (I->getOpcode()) { - default: assert(0 && "Inconsistency with CanEvaluateShifted"); + default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); case Instruction::And: case Instruction::Or: case Instruction::Xor: @@ -536,12 +536,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. // If this is oversized composite shift, then unsigned shifts get 0, ashr // saturates. if (AmtSum >= TypeBits) { @@ -577,7 +576,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->isExact()) { + // (X >>?,exact C1) << C2 --> X << (C2-C1) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, @@ -587,15 +595,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + // (X <<nuw C1) >>u C2 --> X >>u (C2-C1) + if (ShiftOp->hasNoUnsignedWrap()) { + BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr, + X, ShiftDiffCst); + NewLShr->setIsExact(I.isExact()); + return NewLShr; + } + Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - - // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X >>s (C2-C1) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr, + X, ShiftDiffCst); + NewAShr->setIsExact(I.isExact()); + return NewAShr; + } + } } else { assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; @@ -603,9 +630,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl) { - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, - ConstantInt::get(Ty, ShiftDiff)); - + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->isExact()) { + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; + } + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), + X, ShiftDiffCst); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); @@ -614,14 +648,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->hasNoUnsignedWrap()) { + // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(true); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoSignedWrap(true); + return NewShl; + } + } } } return 0; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 5cd9a4b..61a8e5b 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -567,9 +567,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return I; } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in // the known zeros and ones. ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) { + APInt I0 = C0->getValue(); + if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) { + Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0); + return InsertNewInstWith(Xor, *I); + } + } break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -671,8 +682,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA, I->getName()); + BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), + SA, I->getName()); + NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); return InsertNewInstWith(NewVal, *I); } else if ((KnownOne & SignBit) != 0) { // New bits are known one. KnownOne |= HighBits; @@ -822,46 +834,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } UndefElts = 0; - if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + + // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. + if (Constant *C = dyn_cast<Constant>(V)) { + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts.isAllOnesValue()) + return 0; + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); - - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) + + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); UndefElts.setBit(i); - } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. + continue; + } + + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) return 0; + + if (isa<UndefValue>(Elt)) { // Already undef. Elts.push_back(Undef); UndefElts.setBit(i); } else { // Otherwise, defined. - Elts.push_back(CV->getOperand(i)); + Elts.push_back(Elt); } - - // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CV ? NewCP : 0; - } - - if (isa<ConstantAggregateZero>(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts.isAllOnesValue()) - return 0; - - Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); } - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); + + // If we changed the constant, return it. + Constant *NewCV = ConstantVector::get(Elts); + return NewCV != C ? NewCV : 0; } // Limit search depth. @@ -977,7 +982,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (NewUndefElts) { // Add additional discovered undefs. - std::vector<Constant*> Elts; + SmallVector<Constant*, 16> Elts; for (unsigned i = 0; i < VWidth; ++i) { if (UndefElts[i]) Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 0995d46..cf60f0f 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -16,16 +16,16 @@ using namespace llvm; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. +/// is to leave as a vector operation. isConstant indicates whether we're +/// extracting one known element. If false we're extracting a variable index. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) - return true; - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { + if (Constant *C = dyn_cast<Constant>(V)) { if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) + + // If all elts are the same, we can extract it and use any of the values. + Constant *Op0 = C->getAggregateElement(0U); + for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) + if (C->getAggregateElement(i) != Op0) return false; return true; } @@ -53,41 +53,18 @@ static bool CheapToScalarize(Value *V, bool isConstant) { return false; } -/// getShuffleMask - Read and decode a shufflevector mask. -/// Turn undef elements into negative values. -static SmallVector<int, 16> getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return SmallVector<int, 16>(NElts, 0); - if (isa<UndefValue>(SVI->getOperand(2))) - return SmallVector<int, 16>(NElts, -1); - - SmallVector<int, 16> Result; - const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa<UndefValue>(*i)) - Result.push_back(-1); // undef - else - Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); - return Result; -} - /// FindScalarElement - Given a vector and an element number, see if the scalar /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - VectorType *PTy = cast<VectorType>(V->getType()); - unsigned Width = PTy->getNumElements(); + VectorType *VTy = cast<VectorType>(V->getType()); + unsigned Width = VTy->getNumElements(); if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); - if (isa<UndefValue>(V)) - return UndefValue::get(PTy->getElementType()); - if (isa<ConstantAggregateZero>(V)) - return Constant::getNullValue(PTy->getElementType()); - if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) - return CP->getOperand(EltNo); + if (Constant *C = dyn_cast<Constant>(V)) + return C->getAggregateElement(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. @@ -106,11 +83,10 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { - unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); if (InEl < (int)LHSWidth) return FindScalarElement(SVI->getOperand(0), InEl); return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); @@ -121,27 +97,11 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa<UndefValue>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa<ConstantAggregateZero>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } + // If vector val is constant with all elements the same, replace EI with + // that element. We handle a known element # below. + if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) + if (CheapToScalarize(C, false)) + return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. @@ -175,8 +135,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (VectorType *VT = - dyn_cast<VectorType>(BCI->getOperand(0)->getType())) + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); @@ -215,7 +174,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); Value *Src; unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + SVI->getOperand(0)->getType()->getVectorNumElements(); if (SrcIdx < 0) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); @@ -248,7 +207,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { /// elements from either LHS or RHS, return the shuffle mask and true. /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, - std::vector<Constant*> &Mask) { + SmallVectorImpl<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); @@ -325,7 +284,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// CollectShuffleElements - We are building a shuffle of V, using RHS as the /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask /// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, +static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, Value *&RHS) { assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && @@ -335,10 +294,14 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; - } else if (isa<ConstantAggregateZero>(V)) { + } + + if (isa<ConstantAggregateZero>(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; - } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + } + + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); @@ -421,7 +384,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { - std::vector<Constant*> Mask; + SmallVector<Constant*, 16> Mask; Value *RHS = 0; Value *LHS = CollectShuffleElements(&IE, Mask, RHS); if (RHS == 0) RHS = UndefValue::get(LHS->getType()); @@ -447,7 +410,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - SmallVector<int, 16> Mask = getShuffleMask(&SVI); + SmallVector<int, 16> Mask = SVI.getShuffleMask(); bool MadeChange = false; @@ -480,20 +443,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } // Remap any references to RHS to use LHS. - std::vector<Constant*> Elts; + SmallVector<Constant*, 16> Elts; for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { - if (Mask[i] < 0) + if (Mask[i] < 0) { Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else { - if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || - (Mask[i] < (int)e && isa<UndefValue>(LHS))) { - Mask[i] = -1; // Turn into undef. - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - } else { - Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - } + continue; + } + + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), + Mask[i])); } } SVI.setOperand(0, SVI.getOperand(1)); @@ -618,12 +582,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector<int, 16> LHSMask; SmallVector<int, 16> RHSMask; - if (newLHS != LHS) { - LHSMask = getShuffleMask(LHSShuffle); - } - if (RHSShuffle && newRHS != RHS) { - RHSMask = getShuffleMask(RHSShuffle); - } + if (newLHS != LHS) + LHSMask = LHSShuffle->getShuffleMask(); + if (RHSShuffle && newRHS != RHS) + RHSMask = RHSShuffle->getShuffleMask(); + unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; SmallVector<int, 16> newMask; bool isSplat = true; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index af065cd..318256a 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -495,7 +495,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { if (ConstantInt *C = dyn_cast<ConstantInt>(V)) return ConstantExpr::getNeg(C); - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); @@ -514,7 +514,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const { if (ConstantFP *C = dyn_cast<ConstantFP>(V)) return ConstantExpr::getFNeg(C); - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); @@ -1247,13 +1247,13 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 1; i < NumCases; ++i) { + for (unsigned i = 0; i < NumCases; ++i) { ConstantInt* CaseVal = SI.getCaseValue(i); Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), AddRHS); assert(isa<ConstantInt>(NewCaseVal) && "Result of expression should be constant"); - SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + SI.setCaseValue(i, cast<ConstantInt>(NewCaseVal)); } SI.setCondition(I->getOperand(0)); Worklist.Add(I); @@ -1270,24 +1270,16 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return ReplaceInstUsesWith(EV, Agg); if (Constant *C = dyn_cast<Constant>(Agg)) { - if (isa<UndefValue>(C)) - return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); - - if (isa<ConstantAggregateZero>(C)) - return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); - - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { - // Extract the element indexed by the first index out of the constant - Value *V = C->getOperand(*EV.idx_begin()); - if (EV.getNumIndices() > 1) - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(V, EV.getIndices().slice(1)); - else - return ReplaceInstUsesWith(EV, V); + if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) { + if (EV.getNumIndices() == 0) + return ReplaceInstUsesWith(EV, C2); + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } return 0; // Can't handle other constants - } + } + if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; @@ -1881,15 +1873,15 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { // See if this is an explicit destination. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) if (SI->getCaseValue(i) == Cond) { - BasicBlock *ReachableBB = SI->getSuccessor(i); + BasicBlock *ReachableBB = SI->getCaseSuccessor(i); Worklist.push_back(ReachableBB); continue; } // Otherwise it is the default destination. - Worklist.push_back(SI->getSuccessor(0)); + Worklist.push_back(SI->getDefaultDest()); continue; } } |