aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp10
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp22
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp206
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp8
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h3
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp73
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp50
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp123
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp101
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp285
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp14
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp310
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp2
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp66
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp243
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp7
-rw-r--r--lib/Transforms/Scalar/GVN.cpp443
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp99
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp8
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp5
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp23
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp46
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp31
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp915
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp5
-rw-r--r--lib/Transforms/Vectorize/VecUtils.cpp226
-rw-r--r--lib/Transforms/Vectorize/VecUtils.h28
36 files changed, 2547 insertions, 904 deletions
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index b63495b..a7bf188 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -68,10 +69,11 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
if (LLVMUsed == 0) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
- for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
- dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
- UsedValues.insert(GV);
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ GlobalValue *GV = cast<GlobalValue>(Operand);
+ UsedValues.insert(GV);
+ }
}
// True if A is better than B.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 49ef1e7..3fdb5f0 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -343,8 +343,9 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.isDeclaration() || Fn.mayBeOverridden())
return false;
- // Functions with local linkage should already have been handled.
- if (Fn.hasLocalLinkage())
+ // Functions with local linkage should already have been handled, except the
+ // fragile (variadic) ones which we can improve here.
+ if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
if (Fn.use_empty())
@@ -604,9 +605,20 @@ void DAE::SurveyFunction(const Function &F) {
UseVector MaybeLiveArgUses;
for (Function::const_arg_iterator AI = F.arg_begin(),
E = F.arg_end(); AI != E; ++AI, ++i) {
- // See what the effect of this use is (recording any uses that cause
- // MaybeLive in MaybeLiveArgUses).
- Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+ Liveness Result;
+ if (F.getFunctionType()->isVarArg()) {
+ // Variadic functions will already have a va_arg function expanded inside
+ // them, making them potentially very sensitive to ABI changes resulting
+ // from removing arguments entirely, so don't. For example AArch64 handles
+ // register and stack HFAs very differently, and this is reflected in the
+ // IR which has already been generated.
+ Result = Live;
+ } else {
+ // See what the effect of this use is (recording any uses that cause
+ // MaybeLive in MaybeLiveArgUses).
+ Result = SurveyUses(AI, MaybeLiveArgUses);
+ }
+
// Mark the result.
MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
// Clear the vector again for the next iteration.
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index b035a82..a4de71b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3041,8 +3041,168 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
+/// \brief Given "llvm.used" or "llvm.compiler_used" as a global name, collect
+/// the initializer elements of that global in Set and return the global itself.
+static GlobalVariable *
+collectUsedGlobalVariables(const Module &M, const char *Name,
+ SmallPtrSet<GlobalValue *, 8> &Set) {
+ GlobalVariable *GV = M.getGlobalVariable(Name);
+ if (!GV || !GV->hasInitializer())
+ return GV;
+
+ const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) {
+ Value *Op = Init->getOperand(I);
+ GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ Set.insert(G);
+ }
+ return GV;
+}
+
+static int compareNames(const void *A, const void *B) {
+ const GlobalValue *VA = *reinterpret_cast<GlobalValue* const*>(A);
+ const GlobalValue *VB = *reinterpret_cast<GlobalValue* const*>(B);
+ if (VA->getName() < VB->getName())
+ return -1;
+ if (VB->getName() < VA->getName())
+ return 1;
+ return 0;
+}
+
+static void setUsedInitializer(GlobalVariable &V,
+ SmallPtrSet<GlobalValue *, 8> Init) {
+ SmallVector<llvm::Constant *, 8> UsedArray;
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext());
+
+ for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
+ I != E; ++I) {
+ Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy);
+ UsedArray.push_back(Cast);
+ }
+ // Sort to get deterministic order.
+ array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
+ ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
+
+ Module *M = V.getParent();
+ V.removeFromParent();
+ GlobalVariable *NV =
+ new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+ llvm::ConstantArray::get(ATy, UsedArray), "");
+ NV->takeName(&V);
+ NV->setSection("llvm.metadata");
+ delete &V;
+}
+
+namespace {
+/// \brief An easy to access representation of llvm.used and llvm.compiler_used.
+class LLVMUsed {
+ SmallPtrSet<GlobalValue *, 8> Used;
+ SmallPtrSet<GlobalValue *, 8> CompilerUsed;
+ GlobalVariable *UsedV;
+ GlobalVariable *CompilerUsedV;
+
+public:
+ LLVMUsed(const Module &M) {
+ UsedV = collectUsedGlobalVariables(M, "llvm.used", Used);
+ CompilerUsedV =
+ collectUsedGlobalVariables(M, "llvm.compiler_used", CompilerUsed);
+ }
+ typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+ iterator usedBegin() { return Used.begin(); }
+ iterator usedEnd() { return Used.end(); }
+ iterator compilerUsedBegin() { return CompilerUsed.begin(); }
+ iterator compilerUsedEnd() { return CompilerUsed.end(); }
+ bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
+ bool compilerUsedCount(GlobalValue *GV) const {
+ return CompilerUsed.count(GV);
+ }
+ bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
+ bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
+ bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
+ bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
+
+ void syncVariablesAndSets() {
+ if (UsedV)
+ setUsedInitializer(*UsedV, Used);
+ if (CompilerUsedV)
+ setUsedInitializer(*CompilerUsedV, CompilerUsed);
+ }
+};
+}
+
+static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
+ if (GA.use_empty()) // No use at all.
+ return false;
+
+ assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler_used");
+ if (!GA.hasOneUse())
+ // Strictly more than one use. So at least one is not in llvm.used and
+ // llvm.compiler_used.
+ return true;
+
+ // Exactly one use. Check if it is in llvm.used or llvm.compiler_used.
+ return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
+}
+
+static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
+ const LLVMUsed &U) {
+ unsigned N = 2;
+ assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
+ "We should have removed the duplicated "
+ "element from llvm.compiler_used");
+ if (U.usedCount(&V) || U.compilerUsedCount(&V))
+ ++N;
+ return V.hasNUsesOrMore(N);
+}
+
+static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
+ if (!GA.hasLocalLinkage())
+ return true;
+
+ return U.usedCount(&GA) || U.compilerUsedCount(&GA);
+}
+
+static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
+ RenameTarget = false;
+ bool Ret = false;
+ if (hasUseOtherThanLLVMUsed(GA, U))
+ Ret = true;
+
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!mayHaveOtherReferences(GA, U))
+ return Ret;
+
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ Constant *Aliasee = GA.getAliasee();
+ GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ if (!Target->hasLocalLinkage())
+ return Ret;
+
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
+ return Ret;
+
+ RenameTarget = true;
+ return true;
+}
+
bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
+ LLVMUsed Used(M);
+
+ for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
+ E = Used.usedEnd();
+ I != E; ++I)
+ Used.compilerUsedErase(*I);
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
@@ -3057,37 +3217,29 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
Constant *Aliasee = J->getAliasee();
GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
Target->removeDeadConstantUsers();
- bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
// Make all users of the alias use the aliasee instead.
- if (!J->use_empty()) {
- J->replaceAllUsesWith(Aliasee);
- ++NumAliasesResolved;
- Changed = true;
- }
-
- // If the alias is externally visible, we may still be able to simplify it.
- if (!J->hasLocalLinkage()) {
- // If the aliasee has internal linkage, give it the name and linkage
- // of the alias, and delete the alias. This turns:
- // define internal ... @f(...)
- // @a = alias ... @f
- // into:
- // define ... @a(...)
- if (!Target->hasLocalLinkage())
- continue;
+ bool RenameTarget;
+ if (!hasUsesToReplace(*J, Used, RenameTarget))
+ continue;
- // Do not perform the transform if multiple aliases potentially target the
- // aliasee. This check also ensures that it is safe to replace the section
- // and other attributes of the aliasee with those of the alias.
- if (!hasOneUse)
- continue;
+ J->replaceAllUsesWith(Aliasee);
+ ++NumAliasesResolved;
+ Changed = true;
+ if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
Target->takeName(J);
Target->setLinkage(J->getLinkage());
Target->GlobalValue::copyAttributesFrom(J);
- }
+
+ if (Used.usedErase(J))
+ Used.usedInsert(Target);
+
+ if (Used.compilerUsedErase(J))
+ Used.compilerUsedInsert(Target);
+ } else if (mayHaveOtherReferences(*J, Used))
+ continue;
// Delete the alias.
M.getAliasList().erase(J);
@@ -3095,6 +3247,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
Changed = true;
}
+ Used.syncVariablesAndSets();
+
return Changed;
}
@@ -3223,8 +3377,6 @@ bool GlobalOpt::runOnModule(Module &M) {
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
- Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3242,7 +3394,9 @@ bool GlobalOpt::runOnModule(Module &M) {
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M);
- // Try to remove trivial global destructors.
+ // Try to remove trivial global destructors if they are not removed
+ // already.
+ Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
if (CXAAtExitFn)
LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 986c0b8..8ed7704 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -32,6 +32,12 @@ static cl::opt<bool>
RunLoopVectorization("vectorize-loops",
cl::desc("Run the Loop vectorization passes"));
+// This is a helper flag that we use for testing the profitability of
+// vectorization on -O2 and -Os. It should go away once we make a decision.
+static cl::opt<bool>
+VectorizeO2("vectorize-o2",
+ cl::desc("Enable vectorization on all O levels"));
+
static cl::opt<bool>
RunSLPVectorization("vectorize-slp",
cl::desc("Run the SLP vectorization passes"));
@@ -192,7 +198,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (LoopVectorize && OptLevel > 2)
+ if (LoopVectorize && (OptLevel > 2 || VectorizeO2))
MPM.add(createLoopVectorizePass());
if (!DisableUnrollLoops)
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 3396f79..754eff6 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -332,16 +332,6 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
-/// printer to not emit usual symbol prefix before the symbol name is used then
-/// return linkage name after skipping this special LLVM prefix.
-static StringRef getRealLinkageName(StringRef LinkageName) {
- char One = '\1';
- if (LinkageName.startswith(StringRef(&One, 1)))
- return LinkageName.substr(1);
- return LinkageName;
-}
-
bool StripDeadDebugInfo::runOnModule(Module &M) {
bool Changed = false;
@@ -401,9 +391,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
StringRef FName = DISubprogram(*I).getLinkageName();
if (FName.empty())
FName = DISubprogram(*I).getName();
- if (NamedMDNode *LVNMD =
- M.getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(FName))))
+ if (NamedMDNode *LVNMD = M.getNamedMetadata(
+ "llvm.dbg.lv." + Function::getRealLinkageName(FName)))
LVNMD->eraseFromParent();
}
}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 2a36074..b3084cc 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -1,4 +1,4 @@
-//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -234,6 +234,7 @@ private:
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
+ Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index b96eb51..a2c545f 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -974,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1232,6 +1237,74 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
+ // A * (1 - uitofp i1 C) + B * (uitofp i1 C) -> select C, B, A
+ {
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *M1L, *M1R, *M2L, *M2R;
+ if (match(LHS, m_FMul(m_Value(M1L), m_Value(M1R))) &&
+ match(RHS, m_FMul(m_Value(M2L), m_Value(M2R)))) {
+
+ Value *A, *B, *C1, *C2;
+ if (!match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))))
+ std::swap(M1L, M1R);
+ if (!match(M2R, m_UIToFp(m_Value(C2))))
+ std::swap(M2L, M2R);
+
+ if (match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))) &&
+ match(M2R, m_UIToFp(m_Value(C2))) &&
+ C2->getType()->isIntegerTy(1) &&
+ C1 == C2) {
+ A = M1L;
+ B = M2L;
+ return SelectInst::Create(C1, B, A);
+ }
+
+ std::swap(M1L, M2L);
+ std::swap(M1R, M2R);
+
+ if (!match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))))
+ std::swap(M1L, M1R);
+ if (!match(M2R, m_UIToFp(m_Value(C2))))
+ std::swap(M2L, M2R);
+
+ if (match(M1R, m_FSub(m_FPOne(), m_UIToFp(m_Value(C1)))) &&
+ match(M2R, m_UIToFp(m_Value(C2))) &&
+ C2->getType()->isIntegerTy(1) &&
+ C1 == C2) {
+ A = M1L;
+ B = M2L;
+ return SelectInst::Create(C1, B, A);
+ }
+ }
+ }
+ }
+
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ec75dd2..496fce6 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -173,14 +173,14 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
// Adding a one to a single bit bit-field should be turned into an XOR
// of the bit. First thing to check is to see if this AND is with a
// single bit constant.
- const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+ const APInt &AndRHSV = AndRHS->getValue();
// If there is only one bit set.
if (AndRHSV.isPowerOf2()) {
// Ok, at this point, we know that we are masking the result of the
// ADD down to exactly one bit. If the constant we are adding has
// no bits set below this bit, then we can eliminate the ADD.
- const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+ const APInt& AddRHS = OpRHS->getValue();
// Check to see if any bits below the one bit set in AndRHSV are set.
if ((AddRHS & (AndRHSV-1)) == 0) {
@@ -209,8 +209,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
- ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
- AndRHS->getValue() & ShlMask);
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask);
if (CI->getValue() == ShlMask)
// Masking out bits that the shift already masks.
@@ -230,8 +229,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- ConstantInt *CI = ConstantInt::get(Op->getContext(),
- AndRHS->getValue() & ShrMask);
+ ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask);
if (CI->getValue() == ShrMask)
// Masking out bits that the shift already masks.
@@ -251,8 +249,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
uint32_t BitWidth = AndRHS->getType()->getBitWidth();
uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
- Constant *C = ConstantInt::get(Op->getContext(),
- AndRHS->getValue() & ShrMask);
+ Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask);
if (C == AndRHS) { // Masking out bits shifted in.
// (Val ashr C1) & C2 -> (Val lshr C1) & C2
// Make the argument unsigned.
@@ -279,7 +276,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
if (Inside) {
if (Lo == Hi) // Trivially false.
- return ConstantInt::getFalse(V->getContext());
+ return Builder->getFalse();
// V >= Min && V < Hi --> V < Hi
if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
@@ -296,7 +293,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
}
if (Lo == Hi) // Trivially true.
- return ConstantInt::getTrue(V->getContext());
+ return Builder->getTrue();
// V < Min || V >= Hi -> V > Hi-1
Hi = SubOne(cast<ConstantInt>(Hi));
@@ -943,7 +940,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// false.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ConstantInt::getFalse(LHS->getContext());
+ return Builder->getFalse();
return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
}
@@ -1380,7 +1377,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
// into a byteswap. At least one of the two bytes would not be aligned with
// their ultimate destination.
if (!isPowerOf2_32(ByteMask)) return true;
- unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+ unsigned InputByteNo = countTrailingZeros(ByteMask);
// 2) The input and ultimate destinations must line up: if byte 3 of an i32
// is demanded, it needs to go into byte 0 of the result. This means that the
@@ -1588,7 +1585,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
}
case ICmpInst::ICMP_ULT:
switch (RHSCC) {
@@ -1640,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
break;
}
@@ -1655,7 +1652,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
break;
}
@@ -1676,7 +1673,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
// If either of the constants are nans, then the whole thing returns
// true.
if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
- return ConstantInt::getTrue(LHS->getContext());
+ return Builder->getTrue();
// Otherwise, no need to compare the two constants, compare the
// rest.
@@ -1779,8 +1776,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateAnd(Or,
- ConstantInt::get(I.getContext(),
- RHS->getValue() | C1->getValue()));
+ Builder->getInt(RHS->getValue() | C1->getValue()));
}
// (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
@@ -1789,8 +1785,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
return BinaryOperator::CreateXor(Or,
- ConstantInt::get(I.getContext(),
- C1->getValue() & ~RHS->getValue()));
+ Builder->getInt(C1->getValue() & ~RHS->getValue()));
}
// Try to fold constant and into select arguments.
@@ -1872,15 +1867,13 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
(V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(A,
- ConstantInt::get(A->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
// Or commutes, try both ways.
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
(V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
return BinaryOperator::CreateAnd(B,
- ConstantInt::get(B->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
@@ -1891,8 +1884,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
(C4->getValue() & ~C2->getValue()) == 0) {
V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
return BinaryOperator::CreateAnd(V2,
- ConstantInt::get(B->getContext(),
- C1->getValue()|C2->getValue()));
+ Builder->getInt(C1->getValue()|C2->getValue()));
}
}
}
@@ -2160,8 +2152,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (CI->hasOneUse() && Op0C->hasOneUse()) {
Instruction::CastOps Opcode = Op0C->getOpcode();
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- (RHS == ConstantExpr::getCast(Opcode,
- ConstantInt::getTrue(I.getContext()),
+ (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(),
Op0C->getDestTy()))) {
CI->setPredicate(CI->getInversePredicate());
return CastInst::Create(Opcode, CI, Op0C->getType());
@@ -2191,8 +2182,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Op0I->getOperand(0));
} else if (RHS->getValue().isSignBit()) {
// (X + C) ^ signbit -> (X + C + signbit)
- Constant *C = ConstantInt::get(I.getContext(),
- RHS->getValue() + Op0CI->getValue());
+ Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2ee1278..361acdd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -677,7 +677,6 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- case Instruction::Shl:
if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
return false;
@@ -701,6 +700,17 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
+ case Instruction::Shl:
+ // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
+ // upper bits we can reduce BitsToClear by the shift amount.
+ if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ return false;
+ uint64_t ShiftAmt = Amt->getZExtValue();
+ BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
+ return true;
+ }
+ return false;
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4c252c0..af8a479 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -402,7 +402,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if (SecondTrueElement != Overdefined) {
// None true -> false.
if (FirstTrueElement == Undefined)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
@@ -422,7 +422,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if (SecondFalseElement != Overdefined) {
// None false -> true.
if (FirstFalseElement == Undefined)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
@@ -712,8 +712,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (NumDifferences == 0) // SAME GEP?
return ReplaceInstUsesWith(I, // No comparison is needed here.
- ConstantInt::get(Type::getInt1Ty(I.getContext()),
- ICmpInst::isTrueWhenEqual(Cond)));
+ Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
else if (NumDifferences == 1 && GEPsInBounds) {
Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -752,11 +751,11 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+4) == X -> false.
if (Pred == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
// (X+4) != X -> true.
if (Pred == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
@@ -798,7 +797,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
- Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+ Constant *C = Builder->getInt(CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
}
@@ -921,7 +920,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
default: llvm_unreachable("Unhandled icmp opcode!");
case ICmpInst::ICMP_EQ:
if (LoOverflow && HiOverflow)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, LoBound);
@@ -932,7 +931,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
DivIsSigned, true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (HiOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, LoBound);
@@ -944,16 +943,16 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT:
if (LoOverflow == +1) // Low bound is greater than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (LoOverflow == -1) // Low bound is less than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
return new ICmpInst(Pred, X, LoBound);
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT:
if (HiOverflow == +1) // High bound greater than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (HiOverflow == -1) // High bound less than input range.
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
if (Pred == ICmpInst::ICMP_UGT)
return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
@@ -1017,7 +1016,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
- ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
if (Shr->getOpcode() == Instruction::LShr)
Comp = Comp.lshr(ShAmtVal);
else
@@ -1025,8 +1024,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- IsICMP_NE);
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
@@ -1039,7 +1037,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
if (Shr->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+ Constant *Mask = Builder->getInt(Val);
Value *And = Builder->CreateAnd(Shr->getOperand(0),
Mask, Shr->getName()+".mask");
@@ -1072,7 +1070,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
APInt NewRHS = RHS->getValue().zext(SrcBits);
NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(), NewRHS));
+ Builder->getInt(NewRHS));
}
}
break;
@@ -1115,8 +1113,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
? ICI.getUnsignedPredicate()
: ICI.getSignedPredicate();
return new ICmpInst(Pred, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),
- RHSV ^ SignBit));
+ Builder->getInt(RHSV ^ SignBit));
}
// (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
@@ -1127,8 +1124,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
: ICI.getSignedPredicate();
Pred = ICI.getSwappedPredicate(Pred);
return new ICmpInst(Pred, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),
- RHSV ^ NotSignBit));
+ Builder->getInt(RHSV ^ NotSignBit));
}
}
}
@@ -1218,11 +1214,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// As a special case, check to see if this means that the
// result is always true or false now.
if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::getFalse(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getFalse());
if (ICI.getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::getTrue(ICI.getContext()));
+ return ReplaceInstUsesWith(ICI, Builder->getTrue());
} else {
ICI.setOperand(1, NewCst);
Constant *NewAndCST;
@@ -1344,8 +1338,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ShAmt);
if (Comp != RHS) {// Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst =
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+ Constant *Cst = Builder->getInt1(IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
@@ -1364,9 +1357,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
- Constant *Mask =
- ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
- TypeBits-ShAmtVal));
+ Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits,
+ TypeBits - ShAmtVal));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
@@ -1464,18 +1456,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ICI.isSigned()) {
if (CR.getLower().isSignBit()) {
return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ Builder->getInt(CR.getUpper()));
} else if (CR.getUpper().isSignBit()) {
return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getLower()));
+ Builder->getInt(CR.getLower()));
}
} else {
if (CR.getLower().isMinValue()) {
return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getUpper()));
+ Builder->getInt(CR.getUpper()));
} else if (CR.getUpper().isMinValue()) {
return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
- ConstantInt::get(ICI.getContext(),CR.getLower()));
+ Builder->getInt(CR.getLower()));
}
}
}
@@ -1555,9 +1547,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
- return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- isICMP_NE));
+ return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
}
break;
@@ -1566,9 +1556,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// If bits are being compared against that are and'd out, then the
// comparison can never succeed!
if ((RHSV & ~BOC->getValue()) != 0)
- return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- isICMP_NE));
+ return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
// If we have ((X & C) == C), turn it into ((X & C) != 0).
if (RHS == BOC && RHSV.isPowerOf2())
@@ -1619,7 +1607,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
case Intrinsic::bswap:
Worklist.Add(II);
ICI.setOperand(0, II->getArgOperand(0));
- ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+ ICI.setOperand(1, Builder->getInt(RHSV.byteSwap()));
return &ICI;
case Intrinsic::ctlz:
case Intrinsic::cttz:
@@ -2041,19 +2029,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_ULE:
assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
case ICmpInst::ICMP_SLE:
assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
case ICmpInst::ICMP_UGE:
assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
case ICmpInst::ICMP_SGE:
assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
}
// If this comparison is a normal comparison, it demands all
@@ -2192,7 +2180,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
// (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
if (CI->isMinValue(true))
@@ -2211,7 +2199,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
// (x >u 2147483647) -> (x <s 0) -> true if sign bit set
if (CI->isMaxValue(true))
@@ -2229,7 +2217,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()-1));
+ Builder->getInt(CI->getValue()-1));
}
break;
case ICmpInst::ICMP_SGT:
@@ -2243,7 +2231,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(CI->getContext(), CI->getValue()+1));
+ Builder->getInt(CI->getValue()+1));
}
break;
case ICmpInst::ICMP_SGE:
@@ -2719,8 +2707,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt *C1, *C2;
if (match(B, m_ConstantInt(C1)) &&
match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
- Constant *NC = ConstantInt::get(I.getContext(),
- C1->getValue() ^ C2->getValue());
+ Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
Value *Xor = Builder->CreateXor(C, NC);
return new ICmpInst(I.getPredicate(), A, Xor);
}
@@ -2885,9 +2872,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Pred = ICmpInst::ICMP_NE;
break;
case FCmpInst::FCMP_ORD:
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
case FCmpInst::FCMP_UNO:
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
@@ -2907,8 +2894,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
Pred == ICmpInst::ICMP_SLE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
} else {
// If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -2919,8 +2906,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT ||
Pred == ICmpInst::ICMP_ULE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
}
@@ -2932,8 +2919,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
Pred == ICmpInst::ICMP_SGE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
} else {
// See if the RHS value is < UnsignedMin.
@@ -2943,8 +2930,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
Pred == ICmpInst::ICMP_UGE)
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
+ return ReplaceInstUsesWith(I, Builder->getFalse());
}
}
@@ -2966,14 +2953,14 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
switch (Pred) {
default: llvm_unreachable("Unexpected integer comparison!");
case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
case ICmpInst::ICMP_ULE:
// (float)int <= 4.4 --> int <= 4
// (float)int <= -4.4 --> false
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
break;
case ICmpInst::ICMP_SLE:
// (float)int <= 4.4 --> int <= 4
@@ -2985,7 +2972,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int < -4.4 --> false
// (float)int < 4.4 --> int <= 4
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getFalse());
Pred = ICmpInst::ICMP_ULE;
break;
case ICmpInst::ICMP_SLT:
@@ -2998,7 +2985,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int > 4.4 --> int > 4
// (float)int > -4.4 --> true
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
break;
case ICmpInst::ICMP_SGT:
// (float)int > 4.4 --> int > 4
@@ -3010,7 +2997,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
if (RHS.isNegative())
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+ return ReplaceInstUsesWith(I, Builder->getTrue());
Pred = ICmpInst::ICMP_UGT;
break;
case ICmpInst::ICMP_SGE:
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index df73906..e36b762 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -95,6 +95,25 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
return MulExt.slt(Min) || MulExt.sgt(Max);
}
+/// \brief A helper routine of InstCombiner::visitMul().
+///
+/// If C is a vector of known powers of 2, then this function returns
+/// a new vector obtained from C replacing each element with its logBase2.
+/// Return a null pointer otherwise.
+static Constant *getLogBase2Vector(ConstantDataVector *CV) {
+ const APInt *IVal;
+ SmallVector<Constant *, 4> Elts;
+
+ for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
+ Constant *Elt = CV->getElementAsConstant(I);
+ if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
+ return 0;
+ Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
+ }
+
+ return ConstantVector::get(Elts);
+}
+
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -108,24 +127,37 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
- // ((X << C1)*C2) == (X * (C2 << C1))
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
- if (SI->getOpcode() == Instruction::Shl)
- if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
- return BinaryOperator::CreateMul(SI->getOperand(0),
- ConstantExpr::getShl(CI, ShOp));
-
- const APInt &Val = CI->getValue();
- if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
- Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
- BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
- if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
- if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
- return Shl;
+ // Also allow combining multiply instructions on vectors.
+ {
+ Value *NewOp;
+ Constant *C1, *C2;
+ const APInt *IVal;
+ if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_Constant(C2)),
+ m_Constant(C1))) &&
+ match(C1, m_APInt(IVal)))
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2));
+
+ if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
+ Constant *NewCst = 0;
+ if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
+ // Replace X*(2^C) with X << C, where C is either a scalar or a splat.
+ NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
+ else if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(C1))
+ // Replace X*(2^C) with X << C, where C is a vector of known
+ // constant powers of 2.
+ NewCst = getLogBase2Vector(CV);
+
+ if (NewCst) {
+ BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
+ }
}
+ }
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -584,8 +616,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
*I = SI->getOperand(NonNullOperand);
Worklist.Add(BBI);
} else if (*I == SelectCond) {
- *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
- ConstantInt::getFalse(BBI->getContext());
+ *I = Builder->getInt1(NonNullOperand == 1);
Worklist.Add(BBI);
}
}
@@ -817,7 +848,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
/// FP value and:
/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
-/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// If the conversion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
///
static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
@@ -998,37 +1029,19 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
- // X urem C^2 -> X and C-1
- { const APInt *C;
- if (match(Op1, m_Power2(C)))
- return BinaryOperator::CreateAnd(Op0,
- ConstantInt::get(I.getType(), *C-1));
- }
-
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
- if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
- Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(Op1, N1);
- return BinaryOperator::CreateAnd(Op0, Add);
- }
-
- // urem X, (select Cond, 2^C1, 2^C2) -->
- // select Cond, (and X, C1-1), (and X, C2-1)
- // when C1&C2 are powers of two.
- { Value *Cond; const APInt *C1, *C2;
- if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
- Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
- Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
- return SelectInst::Create(Cond, TrueAnd, FalseAnd);
- }
- }
-
// (zext A) urem (zext B) --> zext (A urem B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
I.getType());
+ // X urem Y -> X and Y-1, where Y is a power of 2,
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1);
+ return BinaryOperator::CreateAnd(Op0, Add);
+ }
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2defe63..59502fb 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -974,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -984,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
- // Form a shufflevector instruction.
- SmallVector<Constant *, 8> Mask(VWidth);
- Type *Int32Ty = Type::getInt32Ty(CV->getContext());
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elem = cast<Constant>(CV->getOperand(i));
- if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
- Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
- else if (isa<UndefValue>(Elem))
- Mask[i] = UndefValue::get(Int32Ty);
- else
- return 0;
- }
- Constant *MaskVal = ConstantVector::get(Mask);
- Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
- return ReplaceInstUsesWith(SI, V);
- }
-
if (isa<ConstantAggregateZero>(CondVal)) {
return ReplaceInstUsesWith(SI, FalseVal);
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8add1ea..a7bfe09 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -754,7 +754,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
- KnownZero |= LHSKnownZero;
+ KnownZero.setBit(KnownZero.getBitWidth() - 1);
}
break;
case Instruction::URem: {
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index de8a3ac..d43093d 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -125,17 +125,15 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) ||
- !CheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
return NULL;
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
- PHINode * scalarPHI = cast<PHINode>(
- InsertNewInstWith(PHINode::Create(EI.getType(),
- PN->getNumIncomingValues(), ""), *PN));
+ PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
+ PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
// Scalarize each PHI operand.
- for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Value *PHIInVal = PN->getIncomingValue(i);
BasicBlock *inBB = PN->getIncomingBlock(i);
Value *Elt = EI.getIndexOperand();
@@ -145,17 +143,17 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// scalar PHI and the second operand is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
- unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
- Value *Op = Builder->CreateExtractElement(
- B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
+ Value *Op = InsertNewInstWith(
+ ExtractElementInst::Create(B0->getOperand(opId), Elt,
+ B0->getOperand(opId)->getName() + ".Elt"),
+ *B0);
Value *newPHIUser = InsertNewInstWith(
- BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
- *B0);
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
- Instruction *newEI =
- ExtractElementInst::Create(PHIInVal, Elt, "");
+ Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
// Insert the new instruction into the predecessor basic block.
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
@@ -222,9 +220,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If there's a vector PHI feeding a scalar use through this extractelement
// instruction, try to scalarize the PHI.
if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
- Instruction *scalarPHI = scalarizePHI(EI, PN);
- if (scalarPHI)
- return (scalarPHI);
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return scalarPHI;
}
}
@@ -496,6 +494,252 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
return 0;
}
+/// Return true if we can evaluate the specified expression tree if the vector
+/// elements were shuffled in a different order.
+static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
+ unsigned Depth = 5) {
+ // We can always reorder the elements of a constant.
+ if (isa<Constant>(V))
+ return true;
+
+ // We won't reorder vector arguments. No IPO here.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Two users may expect different orders of the elements. Don't try it.
+ if (!I->hasOneUse())
+ return false;
+
+ if (Depth == 0) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::GetElementPtr: {
+ for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+ return false;
+ }
+ return true;
+ }
+ case Instruction::InsertElement: {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
+ if (!CI) return false;
+ int ElementNumber = CI->getLimitedValue();
+
+ // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
+ // can't put an element into multiple indices.
+ bool SeenOnce = false;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == ElementNumber) {
+ if (SeenOnce)
+ return false;
+ SeenOnce = true;
+ }
+ }
+ return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
+ }
+ }
+ return false;
+}
+
+/// Rebuild a new instruction just like 'I' but with the new operands given.
+/// In the event of type mismatch, the type of the operands is correct.
+static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+ // We don't want to use the IRBuilder here because we want the replacement
+ // instructions to appear next to 'I', not the builder's insertion point.
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ assert(NewOps.size() == 2 && "binary operator with #ops != 2");
+ BinaryOperator *New =
+ BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
+ NewOps[0], NewOps[1], "", BO);
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
+ New->setHasNoSignedWrap(BO->hasNoSignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(BO)) {
+ New->setIsExact(BO->isExact());
+ }
+ return New;
+ }
+ case Instruction::ICmp:
+ assert(NewOps.size() == 2 && "icmp with #ops != 2");
+ return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::FCmp:
+ assert(NewOps.size() == 2 && "fcmp with #ops != 2");
+ return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
+ NewOps[0], NewOps[1]);
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt: {
+ // It's possible that the mask has a different number of elements from
+ // the original cast. We recompute the destination type to match the mask.
+ Type *DestTy =
+ VectorType::get(I->getType()->getScalarType(),
+ NewOps[0]->getType()->getVectorNumElements());
+ assert(NewOps.size() == 1 && "cast with #ops != 1");
+ return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
+ "", I);
+ }
+ case Instruction::GetElementPtr: {
+ Value *Ptr = NewOps[0];
+ ArrayRef<Value*> Idx = NewOps.slice(1);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
+ return GEP;
+ }
+ }
+ llvm_unreachable("failed to rebuild vector instructions");
+}
+
+Value *
+InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+ // Mask.size() does not need to be equal to the number of vector elements.
+
+ assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
+ if (isa<UndefValue>(V)) {
+ return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (isa<ConstantAggregateZero>(V)) {
+ return ConstantAggregateZero::get(
+ VectorType::get(V->getType()->getScalarType(),
+ Mask.size()));
+ }
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ SmallVector<Constant *, 16> MaskValues;
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == -1)
+ MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
+ else
+ MaskValues.push_back(Builder->getInt32(Mask[i]));
+ }
+ return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
+ ConstantVector::get(MaskValues));
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::Select:
+ case Instruction::GetElementPtr: {
+ SmallVector<Value*, 8> NewOps;
+ bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
+ for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ NewOps.push_back(V);
+ NeedsRebuild |= (V != I->getOperand(i));
+ }
+ if (NeedsRebuild) {
+ return BuildNew(I, NewOps);
+ }
+ return I;
+ }
+ case Instruction::InsertElement: {
+ int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
+
+ // The insertelement was inserting at Element. Figure out which element
+ // that becomes after shuffling. The answer is guaranteed to be unique
+ // by CanEvaluateShuffled.
+ bool Found = false;
+ int Index = 0;
+ for (int e = Mask.size(); Index != e; ++Index) {
+ if (Mask[Index] == Element) {
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found)
+ return UndefValue::get(I->getType());
+ Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ return InsertElementInst::Create(V, I->getOperand(1),
+ Builder->getInt32(Index), "", I);
+ }
+ }
+ llvm_unreachable("failed to reorder elements of vector instruction!");
+}
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
@@ -527,9 +771,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHS == RHS || isa<UndefValue>(LHS)) {
if (isa<UndefValue>(LHS) && LHS == RHS) {
// shuffle(undef,undef,mask) -> undef.
- Value* result = (VWidth == LHSWidth)
+ Value *Result = (VWidth == LHSWidth)
? LHS : UndefValue::get(SVI.getType());
- return ReplaceInstUsesWith(SVI, result);
+ return ReplaceInstUsesWith(SVI, Result);
}
// Remap any references to RHS to use LHS.
@@ -576,6 +820,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
}
+ if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
+ Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
+ return ReplaceInstUsesWith(SVI, V);
+ }
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3..ec10751 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
Module *M = II->getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
- ArrayRef<Value *>(), "", II->getParent());
+ None, "", II->getParent());
}
return EraseInstFromFunction(MI);
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 623c470..22851b4 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -39,9 +39,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -56,6 +56,7 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
+static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
@@ -208,6 +209,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64;
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
+ bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips ||
+ TargetTriple.getArch() == llvm::Triple::mipsel;
ShadowMapping Mapping;
@@ -217,7 +220,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
- (LongSize == 32 ? kDefaultShadowOffset32 :
+ (LongSize == 32 ?
+ (IsMIPS32 ? kMIPS32_ShadowOffset32 : kDefaultShadowOffset32) :
IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
assert(LongSize == 64);
@@ -520,7 +524,7 @@ ModulePass *llvm::createAddressSanitizerModulePass(
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
- size_t Res = CountTrailingZeros_32(TypeSize / 8);
+ size_t Res = countTrailingZeros(TypeSize / 8);
assert(Res < kNumberOfAccessSizes);
return Res;
}
@@ -1270,6 +1274,10 @@ void FunctionStackPoisoner::poisonRedZones(
RedzoneSize(),
1ULL << Mapping.Scale,
kAsanStackPartialRedzoneMagic);
+ Poison =
+ ASan.TD->isLittleEndian()
+ ? support::endian::byte_swap<uint32_t, support::little>(Poison)
+ : support::endian::byte_swap<uint32_t, support::big>(Poison);
}
Value *PartialPoison = ConstantInt::get(RZTy, Poison);
IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 1c9e053..aa265a4 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
BlackList.cpp
BoundsChecking.cpp
+ DebugIR.cpp
EdgeProfiling.cpp
GCOVProfiling.cpp
MemorySanitizer.cpp
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
new file mode 100644
index 0000000..020804f
--- /dev/null
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -0,0 +1,310 @@
+//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A Module transform pass that emits a succinct version of the IR and replaces
+// the source file metadata to allow debuggers to step through the IR.
+//
+// The location where the IR file is emitted is the same as the directory
+// operand of the !llvm.dbg.cu metadata node present in the input module. The
+// file name is constructed from the original file name by stripping the
+// extension and replacing it with "-debug-ll" or the Postfix string specified
+// at construction.
+//
+// FIXME: instead of replacing debug metadata, additional metadata should be
+// used to point capable debuggers to the IR file without destroying the
+// mapping to the original source file.
+//
+// FIXME: this pass should not depend on the existance of debug metadata in
+// the module as it does now. Instead, it should use DIBuilder to create the
+// required metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string>
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+namespace {
+
+/// Builds a map of Value* to line numbers on which the Value appears in a
+/// textual representation of the IR by plugging into the AssemblyWriter by
+/// masquerading as an AssemblyAnnotationWriter.
+class ValueToLineMap : public AssemblyAnnotationWriter {
+ ValueMap<const Value *, unsigned int> Lines;
+ typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter;
+
+public:
+
+ /// Prints Module to a null buffer in order to build the map of Value pointers
+ /// to line numbers.
+ ValueToLineMap(Module *M) {
+ raw_null_ostream ThrowAway;
+ M->print(ThrowAway, this);
+ }
+
+ // This function is called after an Instruction, GlobalValue, or GlobalAlias
+ // is printed.
+ void printInfoComment(const Value &V, formatted_raw_ostream &Out) {
+ Out.flush();
+ Lines.insert(std::make_pair(&V, Out.getLine() + 1));
+ }
+
+ /// If V appears on a line in the textual IR representation, sets Line to the
+ /// line number and returns true, otherwise returns false.
+ bool getLine(const Value *V, unsigned int &Line) const {
+ LineIter i = Lines.find(V);
+ if (i != Lines.end()) {
+ Line = i->second;
+ return true;
+ }
+ return false;
+ }
+};
+
+/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value.
+class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> {
+ void remove(Instruction &I) { I.eraseFromParent(); }
+
+public:
+ void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); }
+ void visitDbgValueInst(DbgValueInst &I) { remove(I); }
+ void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); }
+};
+
+/// Removes debug metadata (!dbg) nodes from all instructions as well as
+/// metadata named "llvm.dbg.cu" in the Module.
+class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> {
+public:
+ void visitInstruction(Instruction &I) {
+ if (I.getMetadata(LLVMContext::MD_dbg))
+ I.setMetadata(LLVMContext::MD_dbg, 0);
+ }
+
+ void run(Module *M) {
+ // Remove debug metadata attached to instructions
+ visit(M);
+
+ // Remove CU named metadata (and all children nodes)
+ NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu");
+ M->eraseNamedMetadata(Node);
+ }
+};
+
+/// Replaces line number metadata attached to Instruction nodes with new line
+/// numbers provided by the ValueToLineMap.
+class LineNumberReplacer : public InstVisitor<LineNumberReplacer> {
+ /// Table of line numbers
+ const ValueToLineMap &LineTable;
+
+ /// Table of cloned values
+ const ValueToValueMapTy &VMap;
+
+ /// Directory of debug metadata
+ const DebugInfoFinder &Finder;
+
+public:
+ LineNumberReplacer(const ValueToLineMap &VLM, const DebugInfoFinder &Finder,
+ const ValueToValueMapTy &VMap)
+ : LineTable(VLM), VMap(VMap), Finder(Finder) {}
+
+ void visitInstruction(Instruction &I) {
+ DebugLoc Loc(I.getDebugLoc());
+
+ unsigned Col = 0; // FIXME: support columns
+ unsigned Line;
+ if (!LineTable.getLine(VMap.lookup(&I), Line))
+ // Instruction has no line, it may have been removed (in the module that
+ // will be passed to the debugger) so there is nothing to do here.
+ return;
+
+ DebugLoc NewLoc;
+ if (!Loc.isUnknown())
+ // I had a previous debug location: re-use the DebugLoc
+ NewLoc = DebugLoc::get(Line, Col, Loc.getScope(I.getContext()),
+ Loc.getInlinedAt(I.getContext()));
+ else if (MDNode *scope = findFunctionMD(I.getParent()->getParent()))
+ // I had no previous debug location, but M has some debug information
+ NewLoc =
+ DebugLoc::get(Line, Col, scope, /*FIXME: inlined instructions*/ 0);
+ else
+ // Neither I nor M has any debug information -- nothing to do here.
+ // FIXME: support debugging of undecorated IR (generated by clang without
+ // the -g option)
+ return;
+
+ addDebugLocation(const_cast<Instruction &>(I), NewLoc);
+ }
+
+private:
+
+ /// Returns the MDNode that corresponds with F
+ MDNode *findFunctionMD(const Function *F) {
+ for (DebugInfoFinder::iterator i = Finder.subprogram_begin(),
+ e = Finder.subprogram_end();
+ i != e; ++i) {
+ DISubprogram S(*i);
+ if (S.getFunction() == F)
+ return *i;
+ }
+ // cannot find F -- likely means there is no debug information
+ return 0;
+ }
+
+ void addDebugLocation(Instruction &I, DebugLoc Loc) {
+ MDNode *MD = Loc.getAsMDNode(I.getContext());
+ I.setMetadata(LLVMContext::MD_dbg, MD);
+ }
+};
+
+class DebugIR : public ModulePass {
+ std::string Postfix;
+ std::string Filename;
+
+ /// Flags to control the verbosity of the generated IR file
+ bool hideDebugIntrinsics;
+ bool hideDebugMetadata;
+
+public:
+ static char ID;
+
+ const char *getPassName() const { return "DebugIR"; }
+
+ // FIXME: figure out if we are compiling something that already exists on disk
+ // in text IR form, in which case we can omit outputting a new IR file, or if
+ // we're building something from memory where we actually need to emit a new
+ // IR file for the debugger.
+
+ /// Output a file with the same base name as the original, but with the
+ /// postfix "-debug-ll" appended.
+ DebugIR()
+ : ModulePass(ID), Postfix("-debug-ll"), hideDebugIntrinsics(true),
+ hideDebugMetadata(true) {}
+
+ /// Customize the postfix string used to replace the extension of the
+ /// original filename that appears in the !llvm.dbg.cu metadata node.
+ DebugIR(StringRef postfix, bool hideDebugIntrinsics, bool hideDebugMetadata)
+ : ModulePass(ID), Postfix(postfix),
+ hideDebugIntrinsics(hideDebugIntrinsics),
+ hideDebugMetadata(hideDebugMetadata) {}
+
+private:
+ // Modify the filename embedded in the Compilation-Unit debug information of M
+ bool replaceFilename(Module &M, const DebugInfoFinder &Finder) {
+ bool changed = false;
+
+ // Sanity check -- if llvm.dbg.cu node exists, the DebugInfoFinder
+ // better have found at least one CU!
+ if (M.getNamedMetadata("llvm.dbg.cu"))
+ assert(Finder.compile_unit_count() > 0 &&
+ "Found no compile units but llvm.dbg.cu node exists");
+
+ for (DebugInfoFinder::iterator i = Finder.compile_unit_begin(),
+ e = Finder.compile_unit_end();
+ i != e; ++i) {
+ DICompileUnit CU(*i);
+ Filename = CU.getFilename();
+
+ // Replace extension with postfix
+ size_t dot = Filename.find_last_of(".");
+ if (dot != std::string::npos)
+ Filename.erase(dot);
+ Filename += Postfix;
+
+ CU.setFilename(Filename, M.getContext());
+ changed = true;
+ }
+ return changed;
+ }
+
+ /// Replace existing line number metadata with line numbers that correspond
+ /// with the IR file that is seen by the debugger.
+ void addLineNumberMetadata(Module *M, const ValueToLineMap &VLM,
+ const ValueToValueMapTy &VMap,
+ const DebugInfoFinder &Finder) {
+ LineNumberReplacer Replacer(VLM, Finder, VMap);
+ Replacer.visit(M);
+ }
+
+ void writeDebugBitcode(Module *M) {
+ std::string error;
+ tool_output_file OutFile(Filename.c_str(), error);
+ OutFile.keep();
+ formatted_raw_ostream OS;
+ OS.setStream(OutFile.os());
+ M->print(OS, 0);
+ }
+
+ void removeDebugIntrinsics(Module *M) {
+ DebugIntrinsicsRemover Remover;
+ Remover.visit(M);
+ }
+
+ void removeDebugMetadata(Module *M) {
+ DebugMetadataRemover Remover;
+ Remover.run(M);
+ }
+
+ void updateAndWriteDebugIRFile(Module *M, const DebugInfoFinder &Finder) {
+ // The module we output in text form for a debugger to open is stripped of
+ // 'extras' like debug intrinsics that end up in DWARF anyways and just
+ // clutter the debug experience.
+
+ ValueToValueMapTy VMap;
+ Module *DebuggerM = CloneModule(M, VMap);
+
+ if (hideDebugIntrinsics)
+ removeDebugIntrinsics(DebuggerM);
+
+ if (hideDebugMetadata)
+ removeDebugMetadata(DebuggerM);
+
+ // FIXME: remove all debug metadata from M once we support generating DWARF
+ // subprogram attributes.
+
+ ValueToLineMap LineTable(DebuggerM);
+ addLineNumberMetadata(M, LineTable, VMap, Finder);
+ writeDebugBitcode(DebuggerM);
+ }
+
+ bool runOnModule(Module &M) {
+ // Stores existing debug info needed when creating new line number entries.
+ DebugInfoFinder Finder;
+ Finder.processModule(M);
+
+ bool changed = replaceFilename(M, Finder);
+ if (changed)
+ updateAndWriteDebugIRFile(&M, Finder);
+ return changed;
+ }
+};
+
+} // anonymous namespace
+
+char DebugIR::ID = 0;
+INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false)
+
+ModulePass *llvm::createDebugIRPass(StringRef FilenamePostfix,
+ bool hideDebugIntrinsics,
+ bool hideDebugMetadata) {
+ return new DebugIR(FilenamePostfix, hideDebugIntrinsics, hideDebugMetadata);
+}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 2edd151..3ce9cf6 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -34,7 +34,7 @@
#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/PathV2.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <string>
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4e75904..a3a688d 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -74,6 +74,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -228,7 +229,7 @@ class MemorySanitizer : public FunctionPass {
MDNode *ColdCallWeights;
/// \brief Branch weights for origin store.
MDNode *OriginStoreWeights;
- /// \bried Path to blacklist file.
+ /// \brief Path to blacklist file.
SmallString<64> BlacklistFile;
/// \brief The blacklist.
OwningPtr<BlackList> BL;
@@ -299,30 +300,30 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
RetvalTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 8), false,
GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
RetvalOriginTLS = new GlobalVariable(
M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
- "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
ParamTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
ParamOriginTLS = new GlobalVariable(
M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
- 0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
VAArgTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
VAArgOverflowSizeTLS = new GlobalVariable(
M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
"__msan_va_arg_overflow_size_tls", 0,
- GlobalVariable::GeneralDynamicTLSModel);
+ GlobalVariable::InitialExecTLSModel);
OriginTLS = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+ "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
// We insert an empty inline asm after __msan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -365,11 +366,13 @@ bool MemorySanitizer::doInitialization(Module &M) {
appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
"__msan_init", IRB.getVoidTy(), NULL)), 0);
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(TrackOrigins), "__msan_track_origins");
+ if (TrackOrigins)
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(TrackOrigins), "__msan_track_origins");
- new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
- IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+ if (ClKeepGoing)
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(ClKeepGoing), "__msan_keep_going");
return true;
}
@@ -768,14 +771,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (AI->hasByValAttr()) {
// ByVal pointer itself has clean shadow. We copy the actual
// argument shadow to the underlying memory.
+ // Figure out maximal valid memcpy alignment.
+ unsigned ArgAlign = AI->getParamAlignment();
+ if (ArgAlign == 0) {
+ Type *EltType = A->getType()->getPointerElementType();
+ ArgAlign = MS.TD->getABITypeAlignment(EltType);
+ }
+ unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
Value *Cpy = EntryIRB.CreateMemCpy(
- getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
- Base, Size, AI->getParamAlignment());
+ getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size,
+ CopyAlign);
DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
(void)Cpy;
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr = EntryIRB.CreateLoad(Base);
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
}
DEBUG(dbgs() << " ARG: " << *AI << " ==> " <<
**ShadowPtr << "\n");
@@ -784,7 +794,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
}
}
- ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+ ArgOffset += DataLayout::RoundUpAlignment(Size, kShadowTLSAlignment);
}
assert(*ShadowPtr && "Could not find shadow for an argument");
return *ShadowPtr;
@@ -1963,9 +1973,29 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
};
-VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+/// \brief A no-op implementation of VarArgHelper.
+struct VarArgNoOpHelper : public VarArgHelper {
+ VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {}
+
+ void visitVAStartInst(VAStartInst &I) {}
+
+ void visitVACopyInst(VACopyInst &I) {}
+
+ void finalizeInstrumentation() {}
+};
+
+VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
MemorySanitizerVisitor &Visitor) {
- return new VarArgAMD64Helper(Func, Msan, Visitor);
+ // VarArg handling is only implemented on AMD64. False positives are possible
+ // on other platforms.
+ llvm::Triple TargetTriple(Func.getParent()->getTargetTriple());
+ if (TargetTriple.getArch() == llvm::Triple::x86_64)
+ return new VarArgAMD64Helper(Func, Msan, Visitor);
+ else
+ return new VarArgNoOpHelper(Func, Msan, Visitor);
}
} // namespace
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 299060a..318fa3f 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -579,7 +579,7 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
// Ignore all unusual sizes.
return -1;
}
- size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+ size_t Idx = countTrailingZeros(TypeSize / 8);
assert(Idx < kNumberOfAccessSizes);
return Idx;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index e3d51d5..fc5cf4e 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -30,6 +30,7 @@
#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -107,6 +108,12 @@ namespace {
return std::make_pair(Vector.begin() + Pair.first->second, false);
}
+ iterator find(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
const_iterator find(const KeyT &Key) const {
typename MapTy::const_iterator It = Map.find(Key);
if (It == Map.end()) return Vector.end();
@@ -253,6 +260,40 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
return false;
}
+/// This is a wrapper around getUnderlyingObjCPtr along the lines of
+/// GetUnderlyingObjects except that it returns early when it sees the first
+/// alloca.
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(V);
+ do {
+ const Value *P = Worklist.pop_back_val();
+ P = GetUnderlyingObjCPtr(P);
+
+ if (isa<AllocaInst>(P))
+ return true;
+
+ if (!Visited.insert(P))
+ continue;
+
+ if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
+ } while (!Worklist.empty());
+
+ return false;
+}
+
+
/// @}
///
/// \defgroup ARCOpt ARC Optimization.
@@ -300,18 +341,18 @@ STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
STATISTIC(NumRets, "Number of return value forwarding "
- "retain+autoreleaes eliminated");
+ "retain+autoreleases eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+#ifndef NDEBUG
STATISTIC(NumRetainsBeforeOpt,
- "Number of retains before optimization.");
+ "Number of retains before optimization");
STATISTIC(NumReleasesBeforeOpt,
- "Number of releases before optimization.");
-#ifndef NDEBUG
+ "Number of releases before optimization");
STATISTIC(NumRetainsAfterOpt,
- "Number of retains after optimization.");
+ "Number of retains after optimization");
STATISTIC(NumReleasesAfterOpt,
- "Number of releases after optimization.");
+ "Number of releases after optimization");
#endif
namespace {
@@ -414,8 +455,13 @@ namespace {
/// sequence.
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+ /// If this is true, we cannot perform code motion but can still remove
+ /// retain/release pairs.
+ bool CFGHazardAfflicted;
+
RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+ CFGHazardAfflicted(false) {}
void clear();
@@ -431,6 +477,7 @@ void RRInfo::clear() {
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
+ CFGHazardAfflicted = false;
}
namespace {
@@ -457,10 +504,12 @@ namespace {
Seq(S_None) {}
void SetKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Setting Known Positive.\n");
KnownPositiveRefCount = true;
}
void ClearKnownPositiveRefCount() {
+ DEBUG(dbgs() << "Clearing Known Positive.\n");
KnownPositiveRefCount = false;
}
@@ -516,6 +565,7 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+ RRI.CFGHazardAfflicted |= Other.RRI.CFGHazardAfflicted;
// Merge the insert point sets. If there are any differences,
// that makes this a partial merge.
@@ -587,14 +637,26 @@ namespace {
/// definition.
void SetAsExit() { BottomUpPathCount = 1; }
+ /// Attempt to find the PtrState object describing the top down state for
+ /// pointer Arg. Return a new initialized PtrState describing the top down
+ /// state for Arg if we do not find one.
PtrState &getPtrTopDownState(const Value *Arg) {
return PerPtrTopDown[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg. Return a new initialized PtrState describing the bottom up
+ /// state for Arg if we do not find one.
PtrState &getPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp[Arg];
}
+ /// Attempt to find the PtrState object describing the bottom up state for
+ /// pointer Arg.
+ ptr_iterator findPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp.find(Arg);
+ }
+
void clearBottomUpPointers() {
PerPtrBottomUp.clear();
}
@@ -608,13 +670,20 @@ namespace {
void MergePred(const BBState &Other);
void MergeSucc(const BBState &Other);
- /// Return the number of possible unique paths from an entry to an exit
+ /// Compute the number of possible unique paths from an entry to an exit
/// which pass through this block. This is only valid after both the
/// top-down and bottom-up traversals are complete.
- unsigned GetAllPathCount() const {
+ ///
+ /// Returns true if overflow occured. Returns false if overflow did not
+ /// occur.
+ bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
assert(TopDownPathCount != 0);
assert(BottomUpPathCount != 0);
- return TopDownPathCount * BottomUpPathCount;
+ unsigned long long Product =
+ (unsigned long long)TopDownPathCount*BottomUpPathCount;
+ PathCount = Product;
+ // Overflow occured if any of the upper bits of Product are set.
+ return Product >> 32;
}
// Specialized CFG utilities.
@@ -992,6 +1061,9 @@ namespace {
bool Changed;
ProvenanceAnalysis PA;
+ // This is used to track if a pointer is stored into an alloca.
+ DenseSet<const Value *> MultiOwnersSet;
+
/// A flag indicating whether this optimization pass should run.
bool Run;
@@ -1440,11 +1512,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_RetainBlock:
// If we strength reduce an objc_retainBlock to an objc_retain, continue
// onto the objc_retain peephole optimizations. Otherwise break.
- if (!OptimizeRetainBlockCall(F, Inst, Class))
- break;
- // FALLTHROUGH
- case IC_Retain:
- ++NumRetainsBeforeOpt;
+ OptimizeRetainBlockCall(F, Inst, Class);
break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
@@ -1453,9 +1521,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
- case IC_Release:
- ++NumReleasesBeforeOpt;
- break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1472,8 +1537,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *NewCall =
CallInst::Create(getReleaseCallee(F.getParent()),
Call->getArgOperand(0), "", Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind,
- MDNode::get(C, ArrayRef<Value *>()));
+ NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
"since x is otherwise unused.\nOld: " << *Call << "\nNew: "
@@ -1640,6 +1704,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
PtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
+ bool &NotAllSeqEqualButKnownSafe,
bool &ShouldContinue) {
switch (SuccSSeq) {
case S_CanRelease: {
@@ -1647,6 +1712,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
S.ClearSequenceProgress();
break;
}
+ S.RRI.CFGHazardAfflicted = true;
ShouldContinue = true;
break;
}
@@ -1658,6 +1724,8 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
case S_MovableRelease:
if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
+ else
+ NotAllSeqEqualButKnownSafe = true;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
@@ -1673,7 +1741,8 @@ static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
PtrState &S,
bool &SomeSuccHasSame,
- bool &AllSuccsHaveSame) {
+ bool &AllSuccsHaveSame,
+ bool &NotAllSeqEqualButKnownSafe) {
switch (SuccSSeq) {
case S_CanRelease:
SomeSuccHasSame = true;
@@ -1684,6 +1753,8 @@ static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
case S_Use:
if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
AllSuccsHaveSame = false;
+ else
+ NotAllSeqEqualButKnownSafe = true;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
@@ -1719,6 +1790,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
+ bool NotAllSeqEqualButKnownSafe = false;
succ_const_iterator SI(TI), SE(TI, false);
@@ -1750,17 +1822,17 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
switch(S.GetSeq()) {
case S_Use: {
bool ShouldContinue = false;
- CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
- SomeSuccHasSame, AllSuccsHaveSame,
+ CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
+ AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
ShouldContinue);
if (ShouldContinue)
continue;
break;
}
case S_CanRelease: {
- CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
- S, SomeSuccHasSame,
- AllSuccsHaveSame);
+ CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+ SomeSuccHasSame, AllSuccsHaveSame,
+ NotAllSeqEqualButKnownSafe);
break;
}
case S_Retain:
@@ -1775,8 +1847,15 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
+ if (SomeSuccHasSame && !AllSuccsHaveSame) {
S.ClearSequenceProgress();
+ } else if (NotAllSeqEqualButKnownSafe) {
+ // If we would have cleared the state foregoing the fact that we are known
+ // safe, stop code motion. This is because whether or not it is safe to
+ // remove RR pairs via KnownSafe is an orthogonal concept to whether we
+ // are allowed to perform code motion.
+ S.RRI.CFGHazardAfflicted = true;
+ }
}
}
@@ -1867,6 +1946,28 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case IC_None:
// These are irrelevant.
return NestingDetected;
+ case IC_User:
+ // If we have a store into an alloca of a pointer we are tracking, the
+ // pointer has multiple owners implying that we must be more conservative.
+ //
+ // This comes up in the context of a pointer being ``KnownSafe''. In the
+ // presense of a block being initialized, the frontend will emit the
+ // objc_retain on the original pointer and the release on the pointer loaded
+ // from the alloca. The optimizer will through the provenance analysis
+ // realize that the two are related, but since we only require KnownSafe in
+ // one direction, will match the inner retain on the original pointer with
+ // the guard release on the original pointer. This is fixed by ensuring that
+ // in the presense of allocas we only unconditionally remove pointers if
+ // both our retain and our release are KnownSafe.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
+ BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
+ StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+ if (I != MyStates.bottom_up_ptr_end())
+ MultiOwnersSet.insert(I->first);
+ }
+ }
+ break;
default:
break;
}
@@ -2413,8 +2514,11 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
bool KnownSafe,
bool &AnyPairsCompletelyEliminated) {
// If a pair happens in a region where it is known that the reference count
- // is already incremented, we can similarly ignore possible decrements.
+ // is already incremented, we can similarly ignore possible decrements unless
+ // we are dealing with a retainable object with multiple provenance sources.
bool KnownSafeTD = true, KnownSafeBU = true;
+ bool MultipleOwners = false;
+ bool CFGHazardAfflicted = false;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2433,6 +2537,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
+ MultipleOwners =
+ MultipleOwners || MultiOwnersSet.count(GetObjCArg(NewRetain));
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2444,8 +2550,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
const RRInfo &NewRetainReleaseRRI = Jt->second;
assert(NewRetainReleaseRRI.Calls.count(NewRetain));
if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
- OldDelta -=
- BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // If we overflow when we compute the path count, don't remove/move
+ // anything.
+ const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
+ unsigned PathCount;
+ if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ OldDelta -= PathCount;
// Merge the ReleaseMetadata and IsTailCallRelease values.
if (FirstRelease) {
@@ -2470,8 +2582,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
RE = NewRetainReleaseRRI.ReverseInsertPts.end();
RI != RE; ++RI) {
Instruction *RIP = *RI;
- if (ReleasesToMove.ReverseInsertPts.insert(RIP))
- NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP)) {
+ // If we overflow when we compute the path count, don't
+ // remove/move anything.
+ const BBState &RIPBBState = BBStates[RIP->getParent()];
+ if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
+ NewDelta -= PathCount;
+ }
}
NewReleases.push_back(NewRetainRelease);
}
@@ -2489,6 +2607,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
KnownSafeBU &= NewReleaseRRI.KnownSafe;
+ CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewReleaseRRI.Calls.begin(),
LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2500,8 +2619,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
const RRInfo &NewReleaseRetainRRI = Jt->second;
assert(NewReleaseRetainRRI.Calls.count(NewRelease));
if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
- unsigned PathCount =
- BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+
+ // If we overflow when we compute the path count, don't remove/move
+ // anything.
+ const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
+ unsigned PathCount;
+ if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
OldDelta += PathCount;
OldCount += PathCount;
@@ -2513,7 +2637,11 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
RI != RE; ++RI) {
Instruction *RIP = *RI;
if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
- PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ // If we overflow when we compute the path count, don't
+ // remove/move anything.
+ const BBState &RIPBBState = BBStates[RIP->getParent()];
+ if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
+ return false;
NewDelta += PathCount;
NewCount += PathCount;
}
@@ -2526,9 +2654,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented or nested, we can safely delete the
- // pair regardless of what's between them.
- if (KnownSafeTD || KnownSafeBU) {
+ // If the pointer is known incremented in 1 direction and we do not have
+ // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
+ // to be known safe in both directions.
+ bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
+ ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+ if (UnconditionallySafe) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
@@ -2539,6 +2670,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// less aggressive solution which is.
if (NewDelta != 0)
return false;
+
+ // At this point, we are not going to remove any RR pairs, but we still are
+ // able to move RR pairs. If one of our pointers is afflicted with
+ // CFGHazards, we cannot perform such code motion so exit early.
+ const bool WillPerformCodeMotion = RetainsToMove.ReverseInsertPts.size() ||
+ ReleasesToMove.ReverseInsertPts.size();
+ if (CFGHazardAfflicted && WillPerformCodeMotion)
+ return false;
}
// Determine whether the original call points are balanced in the retain and
@@ -2802,23 +2941,29 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
/// Identify program paths which execute sequences of retains and releases which
/// can be eliminated.
bool ObjCARCOpt::OptimizeSequences(Function &F) {
- /// Releases, Retains - These are used to store the results of the main flow
- /// analysis. These use Value* as the key instead of Instruction* so that the
- /// map stays valid when we get around to rewriting code and calls get
- /// replaced by arguments.
+ // Releases, Retains - These are used to store the results of the main flow
+ // analysis. These use Value* as the key instead of Instruction* so that the
+ // map stays valid when we get around to rewriting code and calls get
+ // replaced by arguments.
DenseMap<Value *, RRInfo> Releases;
MapVector<Value *, RRInfo> Retains;
- /// This is used during the traversal of the function to track the
- /// states for each identified object at each block.
+ // This is used during the traversal of the function to track the
+ // states for each identified object at each block.
DenseMap<const BasicBlock *, BBState> BBStates;
// Analyze the CFG of the function, and all instructions.
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
// Transform.
- return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
- NestingDetected;
+ bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
+ Releases,
+ F.getParent());
+
+ // Cleanup.
+ MultiOwnersSet.clear();
+
+ return AnyPairsCompletelyEliminated && NestingDetected;
}
/// Check if there is a dependent call earlier that does not have anything in
@@ -3051,6 +3196,12 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
PA.setAA(&getAnalysis<AliasAnalysis>());
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, false);
+ }
+#endif
+
// This pass performs several distinct transformations. As a compile-time aid
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 615c517..f0d29c8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 129af8d..996996d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -45,6 +45,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -498,6 +499,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +589,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +674,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -612,6 +693,7 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
@@ -1159,114 +1241,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1297,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1465,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1487,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1555,7 +1515,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1578,20 +1538,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
}
- unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
- NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ CriticalEdgePred.push_back(Pred);
}
}
- if (!NeedToSplit.empty()) {
- toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
- return false;
- }
-
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
- "Fully available value should be eliminated above!");
+ "Fully available value should already be eliminated!");
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
@@ -1600,6 +1554,17 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (NumUnavailablePreds != 1)
return false;
+ // Split critical edges, and update the unavailable predecessors accordingly.
+ for (SmallVector<BasicBlock *, 4>::iterator I = CriticalEdgePred.begin(),
+ E = CriticalEdgePred.end(); I != E; I++) {
+ BasicBlock *OrigPred = *I;
+ BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
+ PredLoads.erase(OrigPred);
+ PredLoads[NewPred] = 0;
+ DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
+ << LoadBB->getName() << '\n');
+ }
+
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
@@ -1615,13 +1580,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1592,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1659,7 +1601,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- return false;
+ // HINT:Don't revert the edge-splitting as following transformation may
+ // also need to split these critial edges.
+ return !CriticalEdgePred.empty();
}
// Okay, we can eliminate this load by inserting a reload in the predecessor
@@ -1714,6 +1658,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
@@ -2296,8 +2306,6 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
- if (splitCriticalEdges())
- ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
@@ -2309,6 +2317,7 @@ bool GVN::runOnFunction(Function& F) {
Changed |= PREChanged;
}
}
+
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
@@ -2542,6 +2551,15 @@ bool GVN::performPRE(Function &F) {
return Changed;
}
+/// Split the critical edge connecting the given two blocks, and return
+/// the block inserted to the critical edge.
+BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+ BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ return BB;
+}
+
/// splitCriticalEdges - Split critical edges found during the previous
/// iteration that may enable further optimization.
bool GVN::splitCriticalEdges() {
@@ -2568,9 +2586,18 @@ bool GVN::iterateOnFunction(Function &F) {
RE = RPOT.end(); RI != RE; ++RI)
Changed |= processBlock(*RI);
#else
+ // Save the blocks this function have before transformation begins. GVN may
+ // split critical edge, and hence may invalidate the RPO/DT iterator.
+ //
+ std::vector<BasicBlock *> BBVect;
+ BBVect.reserve(256);
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- Changed |= processBlock(DI->getBlock());
+ BBVect.push_back(DI->getBlock());
+
+ for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
+ I != E; I++)
+ Changed |= processBlock(*I);
#endif
return Changed;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 8e76c78..df11e92 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L))
+ if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
continue;
// Computing the value outside of the loop brings no benefit if :
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73e44d7..b107fef 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -774,6 +774,16 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
}
namespace {
+class LSRUse;
+}
+// Check if it is legal to fold 2 base registers.
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F);
+// Get the cost of the scaling factor used in F for LU.
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
+
+namespace {
/// Cost - This class is used to measure and compare candidate formulae.
class Cost {
@@ -785,11 +795,12 @@ class Cost {
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
+ unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0) {}
+ SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
@@ -799,9 +810,9 @@ public:
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
- | ImmCost | SetupCost) != ~0u)
+ | ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
- & ImmCost & SetupCost) == ~0u);
+ & ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
@@ -810,12 +821,14 @@ public:
return NumRegs == ~0u;
}
- void RateFormula(const Formula &F,
+ void RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
@@ -900,12 +913,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
}
-void Cost::RateFormula(const Formula &F,
+void Cost::RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
@@ -932,7 +947,12 @@ void Cost::RateFormula(const Formula &F,
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
if (NumBaseParts > 1)
- NumBaseAdds += NumBaseParts - 1;
+ // Do not count the base and a possible second register if the target
+ // allows to fold 2 registers.
+ NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+
+ // Accumulate non-free scaling amounts.
+ ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
@@ -955,6 +975,7 @@ void Cost::Loose() {
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
+ ScaleCost = ~0u;
}
/// operator< - Choose the lower cost.
@@ -967,6 +988,8 @@ bool Cost::operator<(const Cost &Other) const {
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
+ if (ScaleCost != Other.ScaleCost)
+ return ScaleCost < Other.ScaleCost;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
@@ -983,6 +1006,8 @@ void Cost::print(raw_ostream &OS) const {
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
+ if (ScaleCost != 0)
+ OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
@@ -1359,6 +1384,58 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F) {
+ // If F is used as an Addressing Mode, it may fold one Base plus one
+ // scaled register. If the scaled register is nil, do as if another
+ // element of the base regs is a 1-scaled register.
+ // This is possible if BaseRegs has at least 2 registers.
+
+ // If this is not an address calculation, this is not an addressing mode
+ // use.
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // F is already scaled.
+ if (F.Scale != 0)
+ return false;
+
+ // We need to keep one register for the base and one to scale.
+ if (F.BaseRegs.size() < 2)
+ return false;
+
+ return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
+ }
+
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ if (!F.Scale)
+ return 0;
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F) && "Illegal formula in use.");
+
+ switch (LU.Kind) {
+ case LSRUse::Address: {
+ int CurScaleCost = TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+ assert(CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!");
+ return CurScaleCost;
+ }
+ case LSRUse::ICmpZero:
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
+ // Therefore, return 0 in case F.Scale == -1.
+ return F.Scale != -1;
+
+ case LSRUse::Basic:
+ case LSRUse::Special:
+ return 0;
+ }
+
+ llvm_unreachable("Invalid LSRUse Kind!");
+}
+
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
@@ -3607,7 +3684,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.BaseOffset))
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
@@ -3690,7 +3767,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// the corresponding bad register from the Regs set.
Cost CostF;
Regs.clear();
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
@@ -3726,7 +3803,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Cost CostBest;
Regs.clear();
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
+ DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
@@ -4079,7 +4157,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
+ LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be0f0e8..c325925 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -626,8 +626,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ if (!StructTy->isSized()) {
+ // The call may never return and hence the copy-instruction may never
+ // be executed, and therefore it's not safe to say "the destination
+ // has at least <cpyLen> bytes, as implied by the copy-instruction",
+ return false;
+ }
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index be8d39e..d105f5e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
RemapInstruction(II, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper);
+ TypeMapper, Materializer);
}
/// CloneFunction - Return a copy of the specified function, but without
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 37819cc..6d5f16c 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -59,6 +59,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
@@ -100,16 +101,16 @@ namespace {
private:
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- BasicBlock *InsertPreheaderForLoop(Loop *L);
Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
- void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
};
}
+static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L);
+
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", true, false)
@@ -208,7 +209,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L);
+ Preheader = InsertPreheaderForLoop(L, this);
if (Preheader) {
++NumInserted;
Changed = true;
@@ -367,7 +368,7 @@ ReprocessLoop:
/// preheader, this method is called to insert one. This method has two phases:
/// preheader insertion and analysis updating.
///
-BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
BasicBlock *Header = L->getHeader();
// Compute the set of predecessors of the loop that are not in the loop.
@@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
BasicBlock *PreheaderBB;
if (!Header->isLandingPad()) {
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- this);
+ PP);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", this, NewBBs);
+ ".split-lp", PP, NewBBs);
PreheaderBB = NewBBs[0];
}
@@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
// right after some 'outside block' block. This prevents the preheader from
// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
+void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock*> &SplitPreds,
+ Loop *L) {
// Check to see if NewBB is already well placed.
Function::iterator BBI = NewBB; --BBI;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 052ad85..6d12f7a 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -281,7 +281,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
return BI->getCondition();
}
-/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
/// given instruction, which is assumed to be safe to speculate. 1 means
/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
static unsigned ComputeSpeculationCost(const User *I) {
@@ -533,9 +533,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
- ICI->getPredicate() == ICmpInst::ICMP_NE) &&
- GetConstantInt(ICI->getOperand(1), TD))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
@@ -1083,9 +1081,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
(isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
- // If we get here, we can hoist at least one instruction.
BasicBlock *BIParent = BI->getParent();
+ bool Changed = false;
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1100,6 +1098,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
I2->eraseFromParent();
+ Changed = true;
I1 = BB1_Itr++;
I2 = BB2_Itr++;
@@ -1119,7 +1118,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
HoistTerminator:
// It may not be possible to hoist an invoke.
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
- return true;
+ return Changed;
+
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V == BB2V)
+ continue;
+
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ return Changed;
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ return Changed;
+ }
+ }
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
@@ -1362,8 +1377,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
///
/// \return The pointer to the value of the previous store if the store can be
/// hoisted into the predecessor block. 0 otherwise.
-Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
- BasicBlock *StoreBB, BasicBlock *EndBB) {
+static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
if (!StoreToHoist)
return 0;
@@ -1522,18 +1537,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
Value *OrigV = PN->getIncomingValueForBlock(BB);
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+ // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
// Skip PHIs which are trivial.
if (ThenV == OrigV)
continue;
HaveRewritablePHIs = true;
- ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
- if (!CE)
+ ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
+ ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
+ if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if (!isSafeToSpeculativelyExecute(CE))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+ if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
return false;
// Account for the cost of an unfolded ConstantExpr which could end up
@@ -3643,7 +3663,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
-/// for this switch, based on the number of caes, size of the table and the
+/// for this switch, based on the number of cases, size of the table and the
/// types of the results.
static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize,
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..457fc80 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -22,14 +22,22 @@ using namespace llvm;
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
+void ValueMaterializer::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper) {
+ ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
ValueToValueMapTy::iterator I = VM.find(V);
// If the value already exists in the map, use it.
if (I != VM.end() && I->second) return I->second;
+ // If we have a materializer and it can materialize a value, use that.
+ if (Materializer) {
+ if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
+ return VM[V] = NewV;
+ }
+
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
if (isa<GlobalValue>(V) || isa<MDString>(V))
@@ -57,14 +65,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
if (OP == 0) continue;
- Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_OP == OP ||
@@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (Op == 0)
Elts.push_back(0);
else {
- Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
@@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Function *F =
- cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
+ cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer));
BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
@@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Value *Mapped = 0;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
- Mapped = MapValue(Op, VM, Flags, TypeMapper);
+ Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
if (Mapped != C) break;
}
@@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Map the rest of the operands that aren't processed yet.
for (++OpNo; OpNo != NumOperands; ++OpNo)
Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
- Flags, TypeMapper));
+ Flags, TypeMapper, Materializer));
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
/// current values into those specified by VMap.
///
void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer){
// Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, Flags, TypeMapper);
+ Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
// If we aren't ignoring missing entries, assert that something happened.
if (V != 0)
*op = V;
@@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
MDNode *Old = MI->second;
- MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
+ MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
I->setMetadata(MI->first, New);
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9a832f7..3693f4a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
@@ -80,6 +80,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -118,11 +119,11 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
/// than this number of comparisons.
static const unsigned RuntimeMemoryCheckThreshold = 8;
-/// We use a metadata with this name to indicate that a scalar loop was
-/// vectorized and that we don't need to re-vectorize it if we run into it
-/// again.
-static const char*
-AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
+/// Maximum simd width.
+static const unsigned MaxVectorWidth = 64;
+
+/// Maximum vectorization unroll count.
+static const unsigned MaxUnrollFactor = 16;
namespace {
@@ -216,7 +217,7 @@ private:
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
+ Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -312,10 +313,85 @@ private:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
+ /// Holds the extended (to the widest induction type) start index.
+ Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
};
+/// \brief Check if conditionally executed loads are hoistable.
+///
+/// This class has two functions: isHoistableLoad and canHoistAllLoads.
+/// isHoistableLoad should be called on all load instructions that are executed
+/// conditionally. After all conditional loads are processed, the client should
+/// call canHoistAllLoads to determine if all of the conditional executed loads
+/// have an unconditional memory access to the same memory address in the loop.
+class LoadHoisting {
+ typedef SmallPtrSet<Value *, 8> MemorySet;
+
+ Loop *TheLoop;
+ DominatorTree *DT;
+ MemorySet CondLoadAddrSet;
+
+public:
+ LoadHoisting(Loop *L, DominatorTree *D) : TheLoop(L), DT(D) {}
+
+ /// \brief Check if the instruction is a load with a identifiable address.
+ bool isHoistableLoad(Instruction *L);
+
+ /// \brief Check if all of the conditional loads are hoistable because there
+ /// exists an unconditional memory access to the same address in the loop.
+ bool canHoistAllLoads();
+};
+
+bool LoadHoisting::isHoistableLoad(Instruction *L) {
+ LoadInst *LI = dyn_cast<LoadInst>(L);
+ if (!LI)
+ return false;
+
+ CondLoadAddrSet.insert(LI->getPointerOperand());
+ return true;
+}
+
+static void addMemAccesses(BasicBlock *BB, SmallPtrSet<Value *, 8> &Set) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(BI)) // Try a load.
+ Set.insert(LI->getPointerOperand());
+ else if (StoreInst *SI = dyn_cast<StoreInst>(BI)) // Try a store.
+ Set.insert(SI->getPointerOperand());
+ }
+}
+
+bool LoadHoisting::canHoistAllLoads() {
+ // No conditional loads.
+ if (CondLoadAddrSet.empty())
+ return true;
+
+ MemorySet UncondMemAccesses;
+ std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+
+ // Iterate over the unconditional blocks and collect memory access addresses.
+ for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+ BasicBlock *BB = LoopBlocks[i];
+
+ // Ignore conditional blocks.
+ if (BB != LoopLatch && !DT->dominates(BB, LoopLatch))
+ continue;
+
+ addMemAccesses(BB, UncondMemAccesses);
+ }
+
+ // And make sure there is a matching unconditional access for every
+ // conditional load.
+ for (MemorySet::iterator MI = CondLoadAddrSet.begin(),
+ ME = CondLoadAddrSet.end(); MI != ME; ++MI)
+ if (!UncondMemAccesses.count(*MI))
+ return false;
+
+ return true;
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -335,7 +411,8 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ LoadSpeculation(L, DT) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -347,7 +424,8 @@ public:
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult ///< Product of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
@@ -365,7 +443,9 @@ public:
MRK_UIntMin,
MRK_UIntMax,
MRK_SIntMin,
- MRK_SIntMax
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
};
/// This POD struct holds information about reduction variables.
@@ -379,7 +459,7 @@ public:
// The starting value of the reduction.
// It does not have to be zero!
- Value *StartValue;
+ TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr;
// The kind of the reduction.
@@ -424,7 +504,7 @@ public:
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
- SmallVector<Value*, 2> Pointers;
+ SmallVector<TrackingVH<Value>, 2> Pointers;
/// Holds the pointer value at the beginning of the loop.
SmallVector<const SCEV*, 2> Starts;
/// Holds the pointer value at the end of the loop.
@@ -438,7 +518,7 @@ public:
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
/// Start value.
- Value *StartValue;
+ TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
};
@@ -470,6 +550,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Returns the widest induction type.
+ Type *getWidestInductionType() { return WidestIndTy; }
+
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
@@ -498,8 +581,7 @@ public:
/// This function returns the identity element (or neutral element) for
/// the operation K.
- static Constant *getReductionIdentity(ReductionKind K, Type *Tp,
- MinMaxReductionKind MinMaxK);
+ static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -577,6 +659,8 @@ private:
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
+ /// Holds the widest induction type encountered.
+ Type *WidestIndTy;
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
@@ -587,6 +671,11 @@ private:
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr;
+
+ /// Utility to determine whether loads can be speculated.
+ LoadHoisting LoadSpeculation;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -679,6 +768,126 @@ private:
const TargetLibraryInfo *TLI;
};
+/// Utility class for getting and setting loop vectorizer hints in the form
+/// of loop metadata.
+struct LoopVectorizeHints {
+ /// Vectorization width.
+ unsigned Width;
+ /// Vectorization unroll factor.
+ unsigned Unroll;
+
+ LoopVectorizeHints(const Loop *L)
+ : Width(VectorizationFactor)
+ , Unroll(VectorizationUnroll)
+ , LoopID(L->getLoopID()) {
+ getHints(L);
+ // The command line options override any loop metadata except for when
+ // width == 1 which is used to indicate the loop is already vectorized.
+ if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1)
+ Width = VectorizationFactor;
+ if (VectorizationUnroll.getNumOccurrences() > 0)
+ Unroll = VectorizationUnroll;
+ }
+
+ /// Return the loop vectorizer metadata prefix.
+ static StringRef Prefix() { return "llvm.vectorizer."; }
+
+ MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) {
+ SmallVector<Value*, 2> Vals;
+ Vals.push_back(MDString::get(Context, Name));
+ Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
+ return MDNode::get(Context, Vals);
+ }
+
+ /// Mark the loop L as already vectorized by setting the width to 1.
+ void setAlreadyVectorized(Loop *L) {
+ LLVMContext &Context = L->getHeader()->getContext();
+
+ Width = 1;
+
+ // Create a new loop id with one more operand for the already_vectorized
+ // hint. If the loop already has a loop id then copy the existing operands.
+ SmallVector<Value*, 4> Vals(1);
+ if (LoopID)
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
+ Vals.push_back(LoopID->getOperand(i));
+
+ Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
+
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ L->setLoopID(NewLoopID);
+ if (LoopID)
+ LoopID->replaceAllUsesWith(NewLoopID);
+
+ LoopID = NewLoopID;
+ }
+
+private:
+ MDNode *LoopID;
+
+ /// Find hints specified in the loop metadata.
+ void getHints(const Loop *L) {
+ if (!LoopID)
+ return;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ const MDString *S = 0;
+ SmallVector<Value*, 4> Args;
+
+ // The expected hint is either a MDString or a MDNode with the first
+ // operand a MDString.
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ S = dyn_cast<MDString>(MD->getOperand(0));
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
+ Args.push_back(MD->getOperand(i));
+ } else {
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
+ assert(Args.size() == 0 && "too many arguments for MDString");
+ }
+
+ if (!S)
+ continue;
+
+ // Check if the hint starts with the vectorizer prefix.
+ StringRef Hint = S->getString();
+ if (!Hint.startswith(Prefix()))
+ continue;
+ // Remove the prefix.
+ Hint = Hint.substr(Prefix().size(), StringRef::npos);
+
+ if (Args.size() == 1)
+ getHint(Hint, Args[0]);
+ }
+ }
+
+ // Check string hint with one operand.
+ void getHint(StringRef Hint, Value *Arg) {
+ const ConstantInt *C = dyn_cast<ConstantInt>(Arg);
+ if (!C) return;
+ unsigned Val = C->getZExtValue();
+
+ if (Hint == "width") {
+ assert(isPowerOf2_32(Val) && Val <= MaxVectorWidth &&
+ "Invalid width metadata");
+ Width = Val;
+ } else if (Hint == "unroll") {
+ assert(isPowerOf2_32(Val) && Val <= MaxUnrollFactor &&
+ "Invalid unroll metadata");
+ Unroll = Val;
+ } else
+ DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint);
+ }
+};
+
/// The LoopVectorize Pass.
struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
@@ -717,6 +926,13 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
+ LoopVectorizeHints Hints(L);
+
+ if (Hints.Width == 1) {
+ DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ return false;
+ }
+
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
if (!LVL.canVectorize()) {
@@ -744,10 +960,10 @@ struct LoopVectorize : public LoopPass {
// Select the optimal vectorization factor.
LoopVectorizationCostModel::VectorizationFactor VF;
- VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+ VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
// Select the unroll factor.
- unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll,
- VF.Width, VF.Cost);
+ unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
+ VF.Cost);
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
@@ -762,6 +978,9 @@ struct LoopVectorize : public LoopPass {
InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
+ // Mark the loop as already vectorized to avoid vectorizing again.
+ Hints.setAlreadyVectorized(L);
+
DEBUG(verifyFunction(*L->getHeader()->getParent()));
return true;
}
@@ -794,7 +1013,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
- const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
+ const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
@@ -825,7 +1044,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
@@ -838,8 +1057,8 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) {
- int Idx = Negate ? (-i): i;
- Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+ int64_t Idx = Negate ? (-i) : i;
+ Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
// Add the consecutive indices to the vector value.
@@ -1229,21 +1448,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
- // Mark the old scalar loop with metadata that tells us not to vectorize this
- // loop again if we run into it.
- MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
- OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
-
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
OldInduction = Legal->getInduction();
- Type *IdxTy = OldInduction ? OldInduction->getType() :
- DL->getIntPtrType(SE->getContext());
+ Type *IdxTy = Legal->getWidestInductionType();
// Find the loop boundaries.
- const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
// Get the total trip count from the count by adding 1.
@@ -1261,9 +1474,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
- Value *StartIdx = OldInduction ?
- OldInduction->getIncomingValueForBlock(BypassBlock):
- ConstantInt::get(IdxTy, 0);
+ Builder.SetInsertPoint(BypassBlock->getTerminator());
+ Value *StartIdx = ExtendedIdx = OldInduction ?
+ Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
+ IdxTy):
+ ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
LoopBypassBlocks.push_back(BypassBlock);
@@ -1357,76 +1572,101 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
PHINode *ResumeIndex = 0;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ // Set builder to point to last bypass block.
+ BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
LoopVectorizationLegality::InductionInfo II = I->second;
- PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+
+ Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
+ PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
MiddleBlock->getTerminator());
+ // We might have extended the type of the induction variable but we need a
+ // truncated version for the scalar loop.
+ PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
+ PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
+ MiddleBlock->getTerminator()) : 0;
+
Value *EndValue = 0;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- // Handle the integer induction counter:
+ // Handle the integer induction counter.
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
- assert(OrigPhi == OldInduction && "Unknown integer PHI");
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
+
+ // We have the canonical induction variable.
+ if (OrigPhi == OldInduction) {
+ // Create a truncated version of the resume value for the scalar loop,
+ // we might have promoted the type to a larger width.
+ EndValue =
+ BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ TruncResumeVal->addIncoming(EndValue, VecBody);
+
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ break;
+ }
+
+ // Not the canonical induction variable - add the vector loop count to the
+ // start value.
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
- unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
- unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
- Value *CRD = CountRoundDown;
- if (CRDSize > IISize)
- CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
- II.StartValue->getType(), "tr.crd",
- LoopBypassBlocks.back()->getTerminator());
- else if (CRDSize < IISize)
- CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
- II.StartValue->getType(),
- "sext.crd",
- LoopBypassBlocks.back()->getTerminator());
- // Handle reverse integer induction counter:
- EndValue =
- BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StartValue->getType(),
+ "cast.crd");
+ // Handle reverse integer induction counter.
+ EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
- EndValue =
- GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
+ "ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
- Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
- "rev.ind.end",
- LoopBypassBlocks.back()->getTerminator());
- EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
- "rev.ptr.ind.end",
- LoopBypassBlocks.back()->getTerminator());
+ Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
+ "rev.ind.end");
+ EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
+ "rev.ptr.ind.end");
break;
}
}// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ if (OrigPhi == OldInduction)
+ ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
+ else
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ }
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ // The old inductions phi node in the scalar body needs the truncated value.
+ if (OrigPhi == OldInduction)
+ OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
+ else
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
}
// If we are generating a new induction variable then we also need to
@@ -1501,8 +1741,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/// This function returns the identity element (or neutral element) for
/// the operation K.
Constant*
-LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp,
- MinMaxReductionKind MinMaxK) {
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
case RK_IntegerXor:
case RK_IntegerAdd:
@@ -1521,24 +1760,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp,
case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
- case RK_IntegerMinMax:
- switch(MinMaxK) {
- default: llvm_unreachable("Unknown min/max predicate");
- case MRK_UIntMin:
- return ConstantInt::getAllOnesValue(Tp);
- case MRK_UIntMax:
- return ConstantInt::get(Tp, 0);
- case MRK_SIntMin: {
- unsigned BitWidth = Tp->getPrimitiveSizeInBits();
- return ConstantInt::get(Tp->getContext(),
- APInt::getSignedMaxValue(BitWidth));
- }
- case LoopVectorizationLegality::MRK_SIntMax: {
- unsigned BitWidth = Tp->getPrimitiveSizeInBits();
- return ConstantInt::get(Tp->getContext(),
- APInt::getSignedMinValue(BitWidth));
- }
- }
default:
llvm_unreachable("Unknown reduction kind");
}
@@ -1668,6 +1889,8 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
return Instruction::FAdd;
case LoopVectorizationLegality::RK_IntegerMinMax:
return Instruction::ICmp;
+ case LoopVectorizationLegality::RK_FloatMinMax:
+ return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
@@ -1692,8 +1915,21 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
break;
case LoopVectorizationLegality::MRK_SIntMax:
P = CmpInst::ICMP_SGT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
}
- Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Cmp;
+ if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
}
@@ -1761,16 +1997,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
- Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType(),
- RdxDesc.MinMaxKind);
- Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ Value *Identity;
+ Value *VectorStart;
+ if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+ RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+ "minmax.ident");
+ } else {
+ Constant *Iden =
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
// Fix the vector-loop phi.
// We created the induction variable so we know that the
@@ -1784,7 +2028,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
VectorParts &Val = getVectorValue(LoopVal);
for (unsigned part = 0; part < UF; ++part) {
- // Make sure to add the reduction stat value only to the
+ // Make sure to add the reduction stat value only to the
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
@@ -1814,7 +2058,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
- if (Op != Instruction::ICmp)
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
RdxParts[part], ReducedPartRdx,
"bin.rdx");
@@ -1845,7 +2089,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- if (Op != Instruction::ICmp)
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
"bin.rdx");
else
@@ -1982,18 +2226,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
-
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
- P->getParent());
- for (unsigned part = 0; part < UF; ++part) {
- VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
- VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
- "predphi");
+ unsigned NumIncoming = P->getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ if (In == 0)
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ In0[part]);
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
+ }
}
continue;
}
@@ -2010,10 +2269,25 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
- assert(P == OldInduction && "Unexpected PHI");
- Value *Broadcasted = getBroadcastInstrs(Induction);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding 0, 1, 2 ...
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
continue;
@@ -2022,16 +2296,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
- Value *StartIdx = 0;
- // If we have a single integer induction variable then use it.
- // Otherwise, start counting at zero.
- if (OldInduction) {
- LoopVectorizationLegality::InductionInfo OldII =
- Legal->getInductionVars()->lookup(OldInduction);
- StartIdx = OldII.StartValue;
- } else {
- StartIdx = ConstantInt::get(Induction->getType(), 0);
- }
+ Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");
@@ -2049,7 +2314,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
continue;
}
@@ -2273,23 +2539,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator()))
return false;
- // We must have at most two predecessors because we need to convert
- // all PHIs to selects.
- unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
- if (Preds > 2)
- return false;
-
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
return false;
}
+ // Check that we can actually speculate the hoistable loads.
+ if (!LoadSpeculation.canHoistAllLoads())
+ return false;
+
// We can if-convert this loop.
return true;
}
bool LoopVectorizationLegality::canVectorize() {
- assert(TheLoop->getLoopPreheader() && "No preheader!!");
+ // We must have a loop in canonical form. Loops with indirectbr in them cannot
+ // be canonicalized.
+ if (!TheLoop->getLoopPreheader())
+ return false;
// We can only vectorize innermost loops.
if (TheLoop->getSubLoopsVector().size())
@@ -2317,7 +2584,7 @@ bool LoopVectorizationLegality::canVectorize() {
TheLoop->getHeader()->getName() << "\n");
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
@@ -2356,16 +2623,50 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+ if (Ty->isPointerTy())
+ return DL.getIntPtrType(Ty->getContext());
+ return Ty;
+}
+
+static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+ Ty0 = convertPointerToIntegerType(DL, Ty0);
+ Ty1 = convertPointerToIntegerType(DL, Ty1);
+ if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+ return Ty0;
+ return Ty1;
+}
+
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+ SmallPtrSet<Value *, 4> &Reductions) {
+ // Reduction instructions are allowed to have exit users. All other
+ // instructions must not have external users.
+ if (!Reductions.count(Inst))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+ I != E; ++I) {
+ Instruction *U = cast<Instruction>(*I);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(U)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return true;
+ }
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
- // If we marked the scalar loop as "already vectorized" then no need
- // to vectorize it again.
- if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
- DEBUG(dbgs() << "LV: This loop was vectorized before\n");
- return false;
- }
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr = F.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex,
+ "no-nans-fp-math").getValueAsString() == "true";
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -2376,16 +2677,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
- // This should not happen because the loop should be normalized.
- if (Phi->getNumIncomingValues() != 2) {
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
+ Type *PhiTy = Phi->getType();
// Check that this PHI type is allowed.
- if (!Phi->getType()->isIntegerTy() &&
- !Phi->getType()->isFloatingPointTy() &&
- !Phi->getType()->isPointerTy()) {
+ if (!PhiTy->isIntegerTy() &&
+ !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
@@ -2393,8 +2689,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// If this PHINode is not in the header block, then we know that we
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
- if (*bb != Header)
- continue;
+ if (*bb != Header) {
+ // Check that this instruction has no outside users or is an
+ // identified reduction value with an outside user.
+ if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ continue;
+ return false;
+ }
+
+ // We only allow if-converted PHIs with more than two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2402,13 +2709,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
InductionKind IK = isInductionVariable(Phi);
if (IK_NoInduction != IK) {
+ // Get the widest type.
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
- if (Induction) {
- DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
- return false;
- }
- Induction = Phi;
+ // Use the phi node with the widest type as induction. Use the last
+ // one if there are multiple (no good reason for doing this other
+ // than it is expedient).
+ if (!Induction || PhiTy == WidestIndTy)
+ Induction = Phi;
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
@@ -2448,6 +2761,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_FloatMinMax)) {
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
@@ -2477,24 +2794,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (!AllowedExit.count(it))
- //Check that all of the users of the loop are inside the BB.
- for (Value::use_iterator I = it->use_begin(), E = it->use_end();
- I != E; ++I) {
- Instruction *U = cast<Instruction>(*I);
- // This user may be a reduction exit value.
- if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
- return false;
- }
- }
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
} // next instr.
}
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- assert(getInductionVars()->size() && "No induction variables");
+ if (Inductions.empty())
+ return false;
}
return true;
@@ -2523,9 +2833,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
Uniforms.insert(I);
// Insert all operands.
- for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) {
- Worklist.push_back(I->getOperand(i));
- }
+ Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
}
}
@@ -2776,8 +3084,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
return false;
}
@@ -2820,8 +3128,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
return false;
}
}
@@ -2841,6 +3149,26 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
+static bool hasMultipleUsesOf(Instruction *I,
+ SmallPtrSet<Instruction *, 8> &Insts) {
+ unsigned NumUses = 0;
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
+ }
+
+ return false;
+}
+
+static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
ReductionKind Kind) {
if (Phi->getNumIncomingValues() != 2)
@@ -2859,129 +3187,160 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = 0;
- // Indicates that we found a binary operation in our scan.
- bool FoundBinOp = false;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
- // Iter is our iterator. We start with the PHI node and scan for all of the
- // users of this instruction. All users must be instructions that can be
- // used as reduction variables (such as ADD). We may have a single
- // out-of-block user. The cycle must end with the original PHI.
- Instruction *Iter = Phi;
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
// To recognize min/max patterns formed by a icmp select sequence, we store
// the number of instruction we saw from the recognized min/max pattern,
- // such that we don't stop when we see the phi has two uses (one by the select
- // and one by the icmp) and to make sure we only see exactly the two
- // instructions.
- unsigned NumICmpSelectPatternInst = 0;
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
ReductionInstDesc ReduxDesc(false, 0);
- // Avoid cycles in the chain.
SmallPtrSet<Instruction *, 8> VisitedInsts;
- while (VisitedInsts.insert(Iter)) {
- // If the instruction has no users then this is a broken
- // chain and can't be a reduction variable.
- if (Iter->use_empty())
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(Phi);
+ VisitedInsts.insert(Phi);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By one instruction outside of the loop (safe).
+ // - By further instructions outside of the loop (not safe).
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
return false;
- // Did we find a user inside this loop already ?
- bool FoundInBlockUser = false;
- // Did we reach the initial PHI node already ?
- bool FoundStartPHI = false;
+ bool IsAPhi = isa<PHINode>(Cur);
- // Is this a bin op ?
- FoundBinOp |= !isa<PHINode>(Iter);
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
- // For each of the *users* of iter.
- for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
- it != e; ++it) {
- Instruction *U = cast<Instruction>(*it);
- // We already know that the PHI is a user.
- if (U == Phi) {
- FoundStartPHI = true;
- continue;
- }
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds.
+ ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+ hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
+ isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi;
+
+ // Process users of current instruction. Push non PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (Value::use_iterator UI = Cur->use_begin(), E = Cur->use_end(); UI != E;
+ ++UI) {
+ Instruction *Usr = cast<Instruction>(*UI);
// Check if we found the exit user.
- BasicBlock *Parent = U->getParent();
+ BasicBlock *Parent = Usr->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users.
if (ExitInstruction != 0)
return false;
- ExitInstruction = Iter;
- }
-
- // We allow in-loop PHINodes which are not the original reduction PHI
- // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
- // structure) then don't skip this PHI.
- if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
- U->getParent() != TheLoop->getHeader() &&
- TheLoop->contains(U) &&
- Iter->hasNUsesOrMore(2))
+ ExitInstruction = Cur;
continue;
+ }
- // We can't have multiple inside users except for a combination of
- // icmp/select both using the phi.
- if (FoundInBlockUser && !NumICmpSelectPatternInst)
- return false;
- FoundInBlockUser = true;
-
- // Any reduction instr must be of one of the allowed kinds.
- ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
- if (!ReduxDesc.IsReduction)
- return false;
+ // Process instructions only once (termination).
+ if (VisitedInsts.insert(Usr)) {
+ if (isa<PHINode>(Usr))
+ PHIs.push_back(Usr);
+ else
+ NonPHIs.push_back(Usr);
+ }
+ // Remember that we completed the cycle.
+ if (Usr == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
- if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
- isa<SelectInst>(U)))
- ++NumICmpSelectPatternInst;
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
- // Reductions of instructions such as Div, and Sub is only
- // possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
- !isa<ICmpInst>(U) && U->getOperand(0) != Iter)
- return false;
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
- Iter = ReduxDesc.PatternLastInst;
- }
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
- return false;
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
- // We found a reduction var if we have reached the original
- // phi node and we only have a single instruction with out-of-loop
- // users.
- if (FoundStartPHI) {
- // This instruction is allowed to have out-of-loop users.
- AllowedExit.insert(ExitInstruction);
-
- // Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.MinMaxKind);
- Reductions[Phi] = RD;
- // We've ended the cycle. This is a reduction variable if we have an
- // outside user and it has a binary op.
- return FoundBinOp && ExitInstruction;
- }
- }
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
+ Reductions[Phi] = RD;
+ // We've ended the cycle. This is a reduction variable if we have an
+ // outside user and it has a binary op.
- return false;
+ return true;
}
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
/// pattern corresponding to a min(X, Y) or max(X, Y).
LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) {
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
- assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
"Expect a select instruction");
- ICmpInst *Cmp = 0;
+ Instruction *Cmp = 0;
SelectInst *Select = 0;
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
- if ((Cmp = dyn_cast<ICmpInst>(I))) {
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
return ReductionInstDesc(false, I);
return ReductionInstDesc(Select, Prev.MinMaxKind);
@@ -2990,13 +3349,14 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns
// Only handle single use cases for now.
if (!(Select = dyn_cast<SelectInst>(I)))
return ReductionInstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
return ReductionInstDesc(false, I);
if (!Cmp->hasOneUse())
return ReductionInstDesc(false, I);
- Value *CmpLeft = Cmp->getOperand(0);
- Value *CmpRight = Cmp->getOperand(1);
+ Value *CmpLeft;
+ Value *CmpRight;
// Look for a min/max pattern.
if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
@@ -3007,6 +3367,14 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns
return ReductionInstDesc(Select, MRK_SIntMax);
else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
return ReductionInstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
return ReductionInstDesc(false, I);
}
@@ -3021,7 +3389,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
default:
return ReductionInstDesc(false, I);
case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+ Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
@@ -3039,9 +3408,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
- if (Kind != RK_IntegerMinMax)
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
return ReductionInstDesc(false, I);
return isMinMaxSelectCmpPattern(I, Prev);
}
@@ -3106,8 +3477,12 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- // We don't predicate loads/stores at the moment.
- if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+ // We might be able to hoist the load.
+ if (it->mayReadFromMemory() && !LoadSpeculation.isHoistableLoad(it))
+ return false;
+
+ // We don't predicate stores at the moment.
+ if (it->mayWriteToMemory() || it->mayThrow())
return false;
// The instructions below can trap.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc30cc9..40e0098 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20,6 +20,7 @@
#include "VecUtils.h"
#include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -47,7 +48,7 @@ namespace {
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
- typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+ typedef MapVector<Value*, BoUpSLP::StoreList> StoreListMap;
/// Pass identification, replacement for typeid
static char ID;
@@ -77,6 +78,8 @@ struct SLPVectorizer : public FunctionPass {
if (!DL)
return false;
+ DEBUG(dbgs()<<"SLP: Analyzing blocks in " << F.getName() << ".\n");
+
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
BasicBlock *BB = it;
bool BBChanged = false;
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
index 9b94366..21e6cdd 100644
--- a/lib/Transforms/Vectorize/VecUtils.cpp
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -46,7 +46,7 @@ namespace llvm {
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
- BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
+ Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
numberInstructions();
}
@@ -121,6 +121,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
if (Cost < CostThreshold) {
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
vectorizeTree(Operands, VF);
i += VF - 1;
Changed = true;
@@ -131,7 +132,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
}
bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
- ValueSet Heads, Tails;
+ SetVector<Value*> Heads, Tails;
SmallDenseMap<Value*, Value*> ConsecutiveChain;
// We may run into multiple chains that merge into a single chain. We mark the
@@ -152,7 +153,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
}
// For stores that start but don't end a link in the chain:
- for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
+ it != e; ++it) {
if (Tails.count(*it)) continue;
// We found a store instr that starts a chain. Now follow the chain and try
@@ -224,9 +226,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
}
void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+ int LastIdx = getLastIndex(Operands, Operands.size());
+ Instruction *Loc = getInsertionPoint(LastIdx);
+ Builder.SetInsertPoint(Loc);
+
+ assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
+ "Vectorizing with in-tree users");
+
Value *Vec = vectorizeTree(Operands, Operands.size());
- BasicBlock::iterator Loc = cast<Instruction>(Vec);
- IRBuilder<> Builder(++Loc);
// After vectorizing the operands we need to generate extractelement
// instructions and replace all of the uses of the scalar values with
// the values that we extracted from the vectorized tree.
@@ -243,6 +250,16 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
+ MustExtract.clear();
+
+ // Find the location of the last root.
+ int LastRootIndex = getLastIndex(VL, VL.size());
+ int FirstUserIndex = getFirstUserIndex(VL, VL.size());
+
+ // Don't vectorize if there are users of the tree roots inside the tree
+ // itself.
+ if (LastRootIndex > FirstUserIndex)
+ return max_cost;
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -250,7 +267,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
// Check that instructions with multiple users can be vectorized. Mark unsafe
// instructions.
- for (ValueSet::iterator it = MultiUserVals.begin(),
+ for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
e = MultiUserVals.end(); it != e; ++it) {
// Check that all of the users of this instr are within the tree
// and that they are all from the same lane.
@@ -258,15 +275,35 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
+ DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
+
+ // We don't have an ordering problem if the user is not in this basic
+ // block.
+ Instruction *Inst = cast<Instruction>(*I);
+ if (Inst->getParent() != BB) {
+ MustExtract.insert(*it);
+ continue;
+ }
+
+ // We don't have an ordering problem if the user is after the last root.
+ int Idx = InstrIdx[Inst];
+ if (Idx < LastRootIndex) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+ "because of an unsafe out of tree usage.\n");
+ break;
+ }
+
+
+ DEBUG(dbgs()<<"SLP: Adding to MustExtract "
+ "because of a safe out of tree usage.\n");
+ MustExtract.insert(*it);
+ continue;
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -311,14 +348,6 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (!I || Opcode != I->getOpcode()) return;
}
- // Mark instructions with multiple users.
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- Instruction *I = dyn_cast<Instruction>(VL[i]);
- // Remember to check if all of the users of this instr are vectorized
- // within our tree.
- if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
- }
-
for (int i = 0, e = VL.size(); i < e; ++i) {
// Check that the instruction is only used within
// one lane.
@@ -327,6 +356,19 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
LaneMap[VL[i]] = i;
}
+ // Mark instructions with multiple users.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Remember to check if all of the users of this instr are vectorized
+ // within our tree. At depth zero we have no local users, only external
+ // users that we don't care about.
+ if (Depth && I && I->getNumUses() > 1) {
+ DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
+ "because it has multiple users:" << *I << " \n");
+ MultiUserVals.insert(I);
+ }
+ }
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -440,11 +482,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check if it is safe to sink the loads or the stores.
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
- int MaxIdx = InstrIdx[VL0];
- for (unsigned i = 1, e = VL.size(); i < e; ++i )
- MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-
+ int MaxIdx = getLastIndex(VL, VL.size());
Instruction *Last = InstrVec[MaxIdx];
+
for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
if (VL[i] == Last) continue;
Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
@@ -456,6 +496,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
+ // Calculate the extract cost.
+ unsigned ExternalUserExtractCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (MustExtract.count(VL[i]))
+ ExternalUserExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +516,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +557,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +587,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
+ return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +603,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
+ return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -564,15 +611,40 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
}
-Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
for (unsigned i = 0; i < VF; ++i )
MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
- return InstrVec[MaxIdx + 1];
+ return MaxIdx;
+}
+
+int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
+ // Find the first user of the values.
+ int FirstUser = InstrVec.size();
+ for (unsigned i = 0; i < VF; ++i) {
+ for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
+ U != UE; ++U) {
+ Instruction *Instr = dyn_cast<Instruction>(*U);
+ if (!Instr || Instr->getParent() != BB)
+ continue;
+
+ FirstUser = std::min(FirstUser, InstrIdx[Instr]);
+ }
+ }
+ return FirstUser;
+}
+
+int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
+ assert(I->getParent() == BB && "Invalid parent for instruction I");
+ assert(J->getParent() == BB && "Invalid parent for instruction J");
+ return std::max(InstrIdx[I],InstrIdx[J]);
+}
+
+Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
+ return InstrVec[Index + 1];
}
Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
- IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
Value *Vec = UndefValue::get(Ty);
for (unsigned i=0; i < Ty->getNumElements(); ++i) {
// Generate the 'InsertElement' instruction.
@@ -583,15 +655,51 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
GatherInstructions.push_back(Vec);
}
+ for (unsigned i = 0; i < Ty->getNumElements(); ++i)
+ VectorizedValues[VL[i]] = Vec;
+
return Vec;
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
+
+ int LastInstrIdx = getLastIndex(VL, VL.size());
+ for (SetVector<Value*>::iterator it = MustExtract.begin(),
+ e = MustExtract.end(); it != e; ++it) {
+ Instruction *I = cast<Instruction>(*it);
+
+ // This is a scalarized value, so we can use the original value.
+ // No need to extract from the vector.
+ if (!LaneMap.count(I))
+ continue;
+
+ Value *Vec = VectorizedValues[I];
+ // We decided not to vectorize I because one of its users was not
+ // vectorizerd. This is okay.
+ if (!Vec)
+ continue;
+
+ Value *Idx = Builder.getInt32(LaneMap[I]);
+ Value *Extract = Builder.CreateExtractElement(Vec, Idx);
+ bool Replaced = false;
+ for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;
+ ++U) {
+ Instruction *UI = cast<Instruction>(*U);
+ if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
+ UI->replaceUsesOfWith(I ,Extract);
+ Replaced = true;
+ }
+ assert(Replaced && "Must replace at least one outside user");
+ (void)Replaced;
+ }
+
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
+ MustExtract.clear();
+ VectorizedValues.clear();
return V;
}
@@ -614,19 +722,27 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
}
// Check that this is a simple vector constant.
- if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+ if (AllConst || AllSameScalar)
+ return Scalarize(VL, VecTy);
// Scalarize unknown structures.
Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
- if (!VL0) return Scalarize(VL, VecTy);
+ if (!VL0)
+ return Scalarize(VL, VecTy);
- if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+ if (VectorizedValues.count(VL0)) {
+ Value * Vec = VectorizedValues[VL0];
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = Vec;
+ return Vec;
+ }
unsigned Opcode = VL0->getOpcode();
for (unsigned i = 0, e = VF; i < e; ++i) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
// If not all of the instructions are identical then we have to scalarize.
- if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ if (!I || Opcode != I->getOpcode())
+ return Scalarize(VL, VecTy);
}
switch (Opcode) {
@@ -646,10 +762,12 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
for (int i = 0; i < VF; ++i)
INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
Value *InVec = vectorizeTree_rec(INVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
CastInst *CI = dyn_cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- VectorizedValues[VL0] = V;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Add:
@@ -672,16 +790,18 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
for (int i = 0; i < VF; ++i) {
- RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
- LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
}
- Value *RHS = vectorizeTree_rec(RHSVL, VF);
Value *LHS = vectorizeTree_rec(LHSVL, VF);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
- VectorizedValues[VL0] = V;
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = V;
+
return V;
}
case Instruction::Load: {
@@ -693,12 +813,18 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
if (!isConsecutiveAccess(VL[i-1], VL[i]))
return Scalarize(VL, VecTy);
- IRBuilder<> Builder(GetLastInstr(VL, VF));
- Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
- VecTy->getPointerTo());
- LI = Builder.CreateLoad(VecPtr);
+ // Loads are inserted at the head of the tree because we don't want to sink
+ // them all the way down past store instructions.
+ Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
+ IRBuilder<> LoadBuilder(Loc);
+ Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = LoadBuilder.CreateLoad(VecPtr);
LI->setAlignment(Alignment);
- VectorizedValues[VL0] = LI;
+
+ for (int i = 0; i < VF; ++i)
+ VectorizedValues[VL[i]] = LI;
+
return LI;
}
case Instruction::Store: {
@@ -710,8 +836,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
Value *VecValue = vectorizeTree_rec(ValueOp, VF);
-
- IRBuilder<> Builder(GetLastInstr(VL, VF));
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
VecTy->getPointerTo());
Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
@@ -721,9 +845,7 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
return 0;
}
default:
- Value *S = Scalarize(VL, VecTy);
- VectorizedValues[VL0] = S;
- return S;
+ return Scalarize(VL, VecTy);
}
}
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
index 5456c6c..d41d2ed 100644
--- a/lib/Transforms/Vectorize/VecUtils.h
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -16,9 +16,11 @@
#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/IRBuilder.h"
#include <vector>
namespace llvm {
@@ -107,9 +109,19 @@ private:
/// \returns the pointer to the barrier instruction if we can't sink.
Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
- /// \returns the instruction that appears last in the BB from \p VL.
+ /// \returns the index of the last instrucion in the BB from \p VL.
/// Only consider the first \p VF elements.
- Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+ int getLastIndex(ArrayRef<Value *> VL, unsigned VF);
+
+ /// \returns the index of the first User of \p VL.
+ /// Only consider the first \p VF elements.
+ int getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF);
+
+ /// \returns the instruction \p I or \p Jt hat appears last in the BB .
+ int getLastIndex(Instruction *I, Instruction *J);
+
+ /// \returns the insertion point for \p Index.
+ Instruction *getInsertionPoint(unsigned Index);
/// \returns a vector from a collection of scalars in \p VL.
Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
@@ -127,15 +139,20 @@ private:
/// NOTICE: The vectorization methods also use this set.
ValueSet MustScalarize;
+ /// Contains values that have users outside of the vectorized graph.
+ /// We need to generate extract instructions for these values.
+ /// NOTICE: The vectorization methods also use this set.
+ SetVector<Value*> MustExtract;
+
/// Contains a list of values that are used outside the current tree. This
/// set must be reset between runs.
- ValueSet MultiUserVals;
+ SetVector<Value*> MultiUserVals;
/// Maps values in the tree to the vector lanes that uses them. This map must
/// be reset between runs of getCost.
std::map<Value*, int> LaneMap;
/// A list of instructions to ignore while sinking
/// memory instructions. This map must be reset between runs of getCost.
- SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+ ValueSet MemBarrierIgnoreList;
// -- Containers that are used during vectorizeTree -- //
@@ -150,6 +167,9 @@ private:
/// Iterating over this list is faster than calling LICM.
ValueList GatherInstructions;
+ /// Instruction builder to construct the vectorized tree.
+ IRBuilder<> Builder;
+
// Analysis and block reference.
BasicBlock *BB;
ScalarEvolution *SE;