aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt11
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp33
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp3
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h6
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp30
-rw-r--r--lib/Transforms/InstCombine/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp50
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt7
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp139
-rw-r--r--lib/Transforms/Instrumentation/LLVMBuild.txt1
-rw-r--r--lib/Transforms/LLVMBuild.txt4
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt9
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp11
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp13
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp4
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp7
-rw-r--r--lib/Transforms/Scalar/GVN.cpp9
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp44
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp16
-rw-r--r--lib/Transforms/Scalar/LICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp148
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp104
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp4
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp162
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp27
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp5
-rw-r--r--lib/Transforms/Scalar/Sink.cpp3
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp9
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp15
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp3
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt9
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp62
-rw-r--r--lib/Transforms/Utils/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Utils/Local.cpp38
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp64
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp29
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp374
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp16
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp12
53 files changed, 1185 insertions, 485 deletions
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 8fa66fc..58b3551 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,14 +20,3 @@ add_llvm_library(LLVMipo
StripDeadPrototypes.cpp
StripSymbols.cpp
)
-
-add_llvm_library_dependencies(LLVMipo
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- LLVMipa
- )
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c57e9fc..2e869e6 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@ namespace {
struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -84,7 +86,10 @@ namespace {
}
char GlobalOpt::ID = 0;
-INITIALIZE_PASS(GlobalOpt, "globalopt",
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -345,6 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
// and will invalidate our notion of what Init is.
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
@@ -828,6 +834,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
static void ConstantPropUsersOf(Value *V) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (Constant *NewC = ConstantFoldInstruction(I)) {
I->replaceAllUsesWith(NewC);
@@ -1931,7 +1938,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
TargetData *TD = getAnalysisIfAvailable<TargetData>();
- Constant *New = ConstantFoldConstantExpression(CE, TD);
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
if (New && New != CE)
GV->setInitializer(New);
}
@@ -2304,7 +2312,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
DenseMap<Constant*, Constant*> &MutatedMemory,
std::vector<GlobalVariable*> &AllocaTmps,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Check to see if this function is already executing (recursion). If so,
// bail out. TODO: we might want to accept limited recursion.
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
@@ -2461,7 +2470,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(Callee, Formals)) {
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
InstResult = C;
} else {
return false;
@@ -2473,7 +2482,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *RetVal;
// Execute the call, if successful, use the return value.
if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
- MutatedMemory, AllocaTmps, SimpleConstants, TD))
+ MutatedMemory, AllocaTmps, SimpleConstants, TD,
+ TLI))
return false;
InstResult = RetVal;
}
@@ -2535,7 +2545,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (!CurInst->use_empty()) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
- InstResult = ConstantFoldConstantExpression(CE, TD);
+ InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
Values[CurInst] = InstResult;
}
@@ -2547,7 +2557,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
/// MutatedMemory - For each store we execute, we update this map. Loads
/// check this to get the most up-to-date value. If evaluation is successful,
/// this state is committed to the process.
@@ -2572,7 +2583,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
bool EvalSuccess = EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>(), CallStack,
MutatedMemory, AllocaTmps,
- SimpleConstants, TD);
+ SimpleConstants, TD, TLI);
if (EvalSuccess) {
// We succeeded at evaluation: commit the result.
@@ -2601,8 +2612,6 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
return EvalSuccess;
}
-
-
/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
/// Return true if anything changed.
bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
@@ -2611,6 +2620,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (Ctors.empty()) return false;
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2628,7 +2639,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (F->empty()) continue;
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, TD)) {
+ if (EvaluateStaticConstructor(F, TD, TLI)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 884faca..b358fab 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = IPO
parent = Transforms
library_name = ipo
required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils
-
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8fdfd72..f63f532 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -21,7 +21,6 @@
#include "llvm/DefaultPasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
@@ -101,6 +100,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -340,4 +340,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
PassManagerBase *LPM = unwrap(PM);
Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
}
-
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index a46d5ad..d070ccc 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
-
-add_llvm_library_dependencies(LLVMInstCombine
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 3808278..464e9d0 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -22,6 +22,7 @@
namespace llvm {
class CallSite;
class TargetData;
+ class TargetLibraryInfo;
class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
@@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
TargetData *TD;
+ TargetLibraryInfo *TLI;
bool MadeIRChange;
public:
/// Worklist - All of the instructions that need to be simplified.
@@ -92,9 +94,11 @@ public:
bool DoOneIteration(Function &F, unsigned ItNum);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
TargetData *getTargetData() const { return TD; }
+ TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
// Visitation implementation - Implement instruction combining for different
// instruction types. The semantics are as follows:
// Return Value:
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e8136ab..27c7c54 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Get the current byte offset into the thing. Use the original
// operand in case we're looking through a bitcast.
SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return 0;
Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -960,7 +962,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
- int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0);
+ int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f10e48a..46e4acd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,6 +14,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-
-
/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
@@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ C = ConstantFoldConstantExpression(CE, TD, TLI);
return C;
}
@@ -528,9 +527,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return ReplaceInstUsesWith(CI, In);
}
-
-
-
+
// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
// zext (X == 1) to i32 --> X iff X has only the low bit set.
@@ -1213,10 +1210,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
- // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
- if (Call && Call->getCalledFunction() &&
- Call->getCalledFunction()->getName() == "sqrt" &&
+ if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+ Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bb1cbfa..144b92b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -284,7 +284,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
- CompareRHS, TD);
+ CompareRHS, TD, TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
@@ -1657,6 +1657,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return 0;
+ // This is only really a signed overflow check if the inputs have been
+ // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+ // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+ unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+ if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+ IC.ComputeNumSignBits(B) < NeededSignBits)
+ return 0;
+
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
@@ -1787,6 +1795,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // comparing -val or val with non-zero is the same as just comparing val
+ // ie, abs(val) != 0 -> val != 0
+ if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+ {
+ Value *Cond, *SelectTrue, *SelectFalse;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+ m_Value(SelectFalse)))) {
+ if (Value *V = dyn_castNegVal(SelectTrue)) {
+ if (V == SelectFalse)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ else if (Value *V = dyn_castNegVal(SelectFalse)) {
+ if (V == SelectTrue)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ }
+ }
+
Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 91e60a4..f1ea8ea 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -282,7 +282,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -294,17 +295,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ TLI);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ TLI);
}
// TODO: We could hand off more cases to instsimplify here.
@@ -330,7 +333,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD);
+ ConstOps, TD, TLI);
}
}
@@ -479,18 +482,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -679,6 +682,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateOr(CondVal, FalseVal);
else if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+ // select a, ~a, b -> (~a)&b
+ // select a, b, ~a -> (~a)|b
+ if (match(TrueVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
// Selecting between two integer constants?
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 6d85add..702e0f2 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+ IC.getTargetLibraryInfo());
return V;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index a7a6311..af065cd 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -41,6 +41,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
}
char InstCombiner::ID = 0;
-INITIALIZE_PASS(InstCombiner, "instcombine",
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
@@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
MadeChange = true;
}
- if ((*I)->getType() != IntPtrTy) {
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
@@ -909,7 +915,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
- PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+ // We do not handle pointer-vector geps here
+ if (!StrippedPtr)
+ return 0;
+
if (StrippedPtr != PtrOp &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1798,7 +1808,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
@@ -1825,7 +1836,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -1843,7 +1854,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, TD);
+ FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
if (!FoldRes)
FoldRes = CE;
@@ -1909,7 +1920,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+ TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -1954,7 +1966,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -2062,7 +2074,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
bool InstCombiner::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
-
+ TLI = &getAnalysis<TargetLibraryInfo>();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt
index b73c303..62c6161 100644
--- a/lib/Transforms/InstCombine/LLVMBuild.txt
+++ b/lib/Transforms/InstCombine/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = InstCombine
parent = Transforms
required_libraries = Analysis Core Support Target TransformUtils
-
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b617539..4cc5727 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -55,8 +55,11 @@ static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -434,6 +437,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRBuilder<> IRB1(CheckTerm);
Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
Crash->setDebugLoc(OrigIns->getDebugLoc());
+ ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
}
// This function replaces all global variables with new variables that have
@@ -517,7 +521,11 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
+ SmallString<2048> DescriptionOfGlobal = G->getName();
+ DescriptionOfGlobal += " (";
+ DescriptionOfGlobal += M.getModuleIdentifier();
+ DescriptionOfGlobal += ")";
+ GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -558,6 +566,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
IRB.CreatePointerCast(AllGlobals, IntptrTy),
ConstantInt::get(IntptrTy, n));
+ // We also need to unregister globals at the end, e.g. when a shared library
+ // gets closed.
+ Function *AsanDtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+ IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+ Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+
+ IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
DEBUG(dbgs() << M);
return true;
}
@@ -631,7 +655,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
Res |= handleFunction(M, *F);
}
- appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/);
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
return Res;
}
@@ -951,8 +975,8 @@ BlackList::BlackList(const std::string &Path) {
OwningPtr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
- errs() << EC.message();
- exit(1);
+ report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
+ EC.message());
}
MemoryBuffer *Buff = File.take();
const char *Data = Buff->getBufferStart();
@@ -962,15 +986,23 @@ BlackList::BlackList(const std::string &Path) {
for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
if (Lines[i].startswith(kFunPrefix)) {
std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
- if (Fun.size()) {
- Fun += "|";
- }
+ std::string ThisFuncRE;
// add ThisFunc replacing * with .*
for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
if (ThisFunc[j] == '*')
- Fun += '.';
- Fun += ThisFunc[j];
+ ThisFuncRE += '.';
+ ThisFuncRE += ThisFunc[j];
}
+ // Check that the regexp is valid.
+ Regex CheckRE(ThisFuncRE);
+ std::string Error;
+ if (!CheckRE.isValid(Error))
+ report_fatal_error("malformed blacklist regex: " + ThisFunc +
+ ": " + Error);
+ // Append to the final regexp.
+ if (Fun.size())
+ Fun += "|";
+ Fun += ThisFuncRE;
}
}
if (Fun.size()) {
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 929b7cd..a4a1fef 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -7,10 +7,3 @@ add_llvm_library(LLVMInstrumentation
PathProfiling.cpp
ProfilingUtils.cpp
)
-
-add_llvm_library_dependencies(LLVMInstrumentation
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ccf7e11..96e5d5b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -43,12 +43,14 @@ namespace {
public:
static char ID;
GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+ : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+ UseExtraChecksum(false) {
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+ GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+ bool useExtraChecksum = false)
: ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(use402Format) {
+ Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
@@ -94,6 +96,7 @@ namespace {
bool EmitNotes;
bool EmitData;
bool Use402Format;
+ bool UseExtraChecksum;
Module *M;
LLVMContext *Ctx;
@@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+ bool Use402Format,
+ bool UseExtraChecksum) {
+ return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
}
namespace {
@@ -167,7 +171,7 @@ namespace {
}
uint32_t length() {
- // Here 2 = 1 for string lenght + 1 for '0' id#.
+ // Here 2 = 1 for string length + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
@@ -244,10 +248,12 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os,
+ bool Use402Format, bool UseExtraChecksum) {
this->os = os;
Function *F = SP.getFunction();
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
uint32_t i = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
Blocks[BB] = new GCOVBlock(i++, os);
@@ -257,14 +263,14 @@ namespace {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (!Use402Format)
- ++BlockLen; // For second checksum.
+ if (UseExtraChecksum)
+ ++BlockLen;
write(BlockLen);
uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
- write(0); // checksum #1
- if (!Use402Format)
- write(0); // checksum #2
+ write(0); // lineno checksum
+ if (UseExtraChecksum)
+ write(0); // cfg checksum
writeGCOVString(SP.getName());
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
@@ -290,6 +296,7 @@ namespace {
for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
write(0); // No flags on our blocks.
}
+ DEBUG(dbgs() << Blocks.size() << " blocks.\n");
// Emit edges between blocks.
for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
@@ -301,6 +308,8 @@ namespace {
write(Block.OutEdges.size() * 2 + 1);
write(Block.Number);
for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+ << "\n");
write(Block.OutEdges[i]->Number);
write(0); // no flags
}
@@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) {
}
void GCOVProfiler::emitGCNO() {
- DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (CU_Nodes) {
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- // Each compile unit gets its own .gcno file. This means that whether we run
- // this pass over the original .o's as they're produced, or run it after
- // LTO, we'll generate the same .gcno files.
-
- DICompileUnit CU(CU_Nodes->getOperand(i));
- raw_fd_ostream *&out = GcnoFiles[CU];
- std::string ErrorInfo;
- out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out->write("oncg*404MVLL", 12);
- else
- out->write("oncg*204MVLL", 12);
-
- DIArray SPs = CU.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram SP(SPs.getElement(i));
- if (!SP.Verify()) continue;
- raw_fd_ostream *&os = GcnoFiles[CU];
-
- Function *F = SP.getFunction();
- if (!F) continue;
- GCOVFunction Func(SP, os, Use402Format);
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
- TerminatorInst *TI = BB->getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
- }
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
- }
-
- uint32_t Line = 0;
- for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
- const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
- if (Line == Loc.getLine()) continue;
- Line = Loc.getLine();
- if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
-
- GCOVLines &Lines = Block.getFile(SP.getFilename());
- Lines.addLine(Loc.getLine());
+ if (!CU_Nodes) return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string ErrorInfo;
+ raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out.write("oncg*404MVLL", 12);
+ else
+ out.write("oncg*204MVLL", 12);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
}
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
}
- Func.writeOut();
}
+ Func.writeOut();
}
- }
-
- for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
- I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
- raw_fd_ostream *&out = I->second;
- out->write("\0\0\0\0\0\0\0\0", 8); // EOF
- out->close();
- delete out;
+ out.write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out.close();
}
}
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
index f302d03..d36ad54 100644
--- a/lib/Transforms/Instrumentation/LLVMBuild.txt
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Instrumentation
parent = Transforms
required_libraries = Analysis Core Support TransformUtils
-
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index d36b898..b2ef49a 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -15,8 +15,10 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = IPO InstCombine Instrumentation Scalar Utils
+
[component_0]
type = Group
name = Transforms
parent = Libraries
-
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a6f0cf3..d660c72 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
-
-add_llvm_library_dependencies(LLVMScalarOpts
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b2..f9abfe9 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -69,6 +70,7 @@ namespace {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
@@ -97,6 +99,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
@@ -116,7 +119,10 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
@@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
WeakVH IterHandle(CurInstIterator);
ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- ModifiedDT ? 0 : DT);
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6..5430f62 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
#include "llvm/Constant.h"
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
#include <set>
@@ -42,19 +44,22 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
}
-
bool ConstantPropagation::runOnFunction(Function &F) {
// Initialize the worklist to all of the instructions ready to process...
std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
Instruction *I = *WorkList.begin();
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f5688cb..8729019 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// writes to addresses which will definitely be overwritten later
if (LaterOff > EarlierOff &&
LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- LaterOff + Later.Size >= EarlierOff + Earlier.Size)
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;
// Otherwise, they don't completely overlap.
@@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
BasicBlock *BB, DominatorTree *DT) {
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *Pred = *I;
+ if (Pred == BB) continue;
TerminatorInst *PredTI = Pred->getTerminator();
if (PredTI->getNumSuccessors() != 1)
continue;
@@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
I != E; ++I)
DeadStackObjects.erase(*I);
}
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2..5241e11 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
@@ -215,6 +216,7 @@ namespace {
class EarlyCSE : public FunctionPass {
public:
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -263,6 +265,7 @@ private:
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
};
@@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
@@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
// Tables that the pass uses when walking the domtree.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a51cbb6..374fdd7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
@@ -446,7 +447,8 @@ namespace {
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
const TargetData *TD;
-
+ const TargetLibraryInfo *TLI;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +532,7 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 0772b48..ad8689a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) {
continue;
// Ignore fancy-aligned globals for now.
- unsigned Alignment = I->getAlignment();
+ unsigned Alignment = TD->getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1f21108..6d52b22 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-namespace llvm {
- cl::opt<bool> EnableIVRewrite(
- "enable-iv-rewrite", cl::Hidden,
- cl::desc("Enable canonical induction variable rewriting"));
-
- // Trip count verification can be enabled by default under NDEBUG if we
- // implement a strong expression equivalence checker in SCEV. Until then, we
- // use the verify-indvars flag, which may assert in some cases.
- cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+static cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
namespace {
class IndVarSimplify : public LoopPass {
@@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
if (FromBase == ToBase)
@@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return 0;
+ // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
- SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
- IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
return 0;
@@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution *SE) {
+ if (!Processed.insert(S))
+ return false;
+
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
@@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
- if (isHighCostExpansion(*I, BI, SE))
+ if (isHighCostExpansion(*I, BI, Processed, SE))
return true;
}
return false;
@@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
return false;
return true;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af3..c78db3f 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
@@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8098b36..8795cd8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<TargetLibraryInfo>();
}
bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
+ TargetData *TD; // TargetData for constant folding.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
// State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I)) {
+ if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
- if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load")))
+ if (LI->getMetadata("invariant.load"))
return true;
// Don't hoist loads which have may-aliased stores in loop.
@@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (Inst.isSafeToSpeculativelyExecute())
+ if (isSafeToSpeculativelyExecute(&Inst))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index 027634d..cee9119 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = Scalar
parent = Transforms
library_name = ScalarOpts
required_libraries = Analysis Core InstCombine Support Target TransformUtils
-
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c..f0f05e6 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.addPreserved("scalar-evolution");
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
@@ -50,6 +52,7 @@ namespace {
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = &getAnalysis<LoopInfo>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, DT);
+ Value *V = SimplifyInstruction(I, TD, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ae51d5..840614e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,17 @@
#include <algorithm>
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EnableNested(
+static cl::opt<bool> EnableNested(
"enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-cl::opt<bool> EnableRetry(
- "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+static cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
- "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
namespace {
@@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) {
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
@@ -705,7 +716,8 @@ public:
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
@@ -718,7 +730,8 @@ private:
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
@@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg,
// on other loops, and cannot be expected to change sibling loops. If the
// AddRec exists, consider it's register free and leave it alone. Otherwise,
// do not consider this formula at all.
- // FIXME: why do we need to generate such fomulae?
else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
- }
+ if (isExistingPhi(AR, SE))
+ return;
+
+ // For !EnableNested, never rewrite IVs in other loops.
if (!EnableNested) {
Loose();
return;
@@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg,
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
@@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
@@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1389,7 +1405,6 @@ class LSRInstance {
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
-public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1450,6 +1465,7 @@ public:
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
+public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- Strides.insert(AR->getStepRecurrence(SE));
+ if (EnableNested || AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
@@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() {
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 37f4c2c..22dbfe3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
-// Temporary flag to be removed in 3.0
static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
- cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
namespace {
class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ // Default unroll count for loops with run-time trip count if
+ // -unroll-count is not set
+ static const unsigned UnrollRuntimeCount = 8;
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- if (!NoSCEVUnroll) {
- // Find "latch trip count". UnrollLoop assumes that control cannot exit
- // via the loop latch on any iteration prior to TripCount. The loop may exit
- // early via an earlier branch.
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (LatchBlock) {
- TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
- }
- }
- else {
- TripCount = L->getSmallConstantTripCount();
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- // Automatically select an unroll count.
+ // Use a default unroll-count if the user doesn't specify a value
+ // and the trip count is a run-time value. The default is different
+ // for run-time or compile-time trip count loops.
unsigned Count = CurrentCount;
+ if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ Count = UnrollRuntimeCount;
+
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial) {
+ if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
+ if (TripCount) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = CurrentThreshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0)
+ Count--;
+ }
+ else if (UnrollRuntime) {
+ // Reduce unroll count to be a lower power-of-two value
+ while (Count != 0 && Size > CurrentThreshold) {
+ Count >>= 1;
+ Size = LoopSize*Count;
+ }
}
if (Count < 2) {
DEBUG(dbgs() << " could not unroll partially\n");
@@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c..a2d0e98 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -71,7 +71,9 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
- SmallPtrSet<Value *,8> UnswitchedVals;
+
+ // FIXME: Consider custom class for this.
+ std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
bool OptimizeForSize;
bool redoLoop;
@@ -117,7 +119,15 @@ namespace {
private:
virtual void releaseMemory() {
- UnswitchedVals.clear();
+ // We need to forget about all switches in the current loop.
+ // FIXME: Do it better than enumerating all blocks of code
+ // and see if it is a switch instruction.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
+ if (SI)
+ UnswitchedVals.erase(SI);
+ }
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -128,6 +138,12 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
+
+ /// For new loop switches we clone info about values that was
+ /// already unswitched and has redundant successors.
+ /// Note, that new loop data is stored inside the VMap.
+ void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB);
void initLoopData() {
loopHeader = currentLoop->getHeader();
@@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && SI->getNumCases() > 1) {
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = SI->getCaseValue(1);
+ Constant *UnswitchVal = NULL;
+
// Do not process same value again and again.
- if (!UnswitchedVals.insert(UnswitchVal))
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ Constant* UnswitchValCandidate = SI->getCaseValue(i);
+ if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
+/// For new loop switches we clone info about values that was
+/// already unswitched and has redundant successors.
+/// Not that new loop data is stored inside the VMap.
+void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB) {
+
+ const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
+ if (SI && UnswitchedVals.count(SI)) {
+ // Don't clone a totally simplified switch.
+ if (isa<Constant>(SI->getCondition()))
+ return;
+ Value* I = VMap.lookup(SI);
+ assert(I && "All instructions that are in SrcBB must be in VMap.");
+ UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
+ }
+}
+
/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
/// loop with no side effects (including infinite loops).
///
@@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// Check to see if a successor of the switch is guaranteed to go to the
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this. Note that we can't trivially unswitch on the default case.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
- if (Val) *Val = SI->getCaseValue(i);
+ ConstantInt* CaseVal = SI->getCaseValue(i);
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (UnswitchedVals[SI].count(CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
break;
}
+ }
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
@@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
- if (Metrics.NumInsts > Threshold ||
- Metrics.NumBlocks * 5 > Threshold ||
+
+ unsigned NumUnswitched =
+ (NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
+
+ unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
+ unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
+
+ if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
@@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
if (!ExitBlock->isLandingPad()) {
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
+ // Inherit simplified switches info for NewBB
+ // We needn't pass NewBB since its instructions are already contained
+ // inside the VMap.
+ CloneUnswitchedVals(VMap, LoopBlocks[i]);
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Instruction *U = dyn_cast<Instruction>(*UI);
if (!U || !L->contains(U))
continue;
- U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
}
+
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+ UI != Worklist.end(); ++UI)
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
SimplifyCode(Worklist, L);
return;
}
@@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = SI->getSuccessor(DeadCase);
BasicBlock *Latch = L->getLoopLatch();
+
+ UnswitchedVals[SI].insert(Val);
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (Value *V = SimplifyInstruction(I, 0, 0, DT))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9e4f51f..7335626 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
- // If we found more than 8 stores to merge or 64 bytes, use memset.
- if (TheStores.size() >= 8 || End-Start >= 64) return true;
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 80f5f01..8e9449f 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
- // Only consider values with pointer types, and not function pointers.
+ // Only consider values with pointer types.
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never reference-counted, however clang occasionally
+ // bitcasts reference-counted pointers to function-pointer type
+ // temporarily.
PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ if (!Ty)
return false;
// Conservatively assume anything else is a potential use.
return true;
@@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
#include "llvm/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1165,6 +1170,7 @@ namespace {
/// Partial - True of we've seen an opportunity for partial RR elimination,
/// such as pushing calls into a CFG triangle or into one side of a
/// CFG diamond.
+ /// TODO: Consider moving this to PtrState.
bool Partial;
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
@@ -1251,16 +1257,6 @@ namespace {
Seq = NewSeq;
}
- void SetSeqToRelease(MDNode *M) {
- if (Seq == S_None || Seq == S_Use) {
- Seq = M ? S_MovableRelease : S_Release;
- RRI.ReleaseMetadata = M;
- } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
- Seq = S_Release;
- RRI.ReleaseMetadata = 0;
- }
- }
-
Sequence GetSeq() const {
return Seq;
}
@@ -1488,7 +1484,7 @@ namespace {
/// metadata.
unsigned ImpreciseReleaseMDKind;
- /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape
+ /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
/// metadata.
unsigned CopyOnEscapeMDKind;
@@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
case S_CanRelease: {
const Value *Arg = I->first;
@@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
}
}
@@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
NestingDetected = true;
- S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
@@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- assert(I != BBStates.end());
// If we haven't seen this node yet, then we've found a CFG cycle.
// Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (!I->second.isVisitedTopDown())
+ if (I == BBStates.end() || !I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- assert(I != BBStates.end());
- if (I->second.isVisitedTopDown())
+ if (I == BBStates.end() || I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
return NestingDetected;
}
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
- // Use reverse-postorder on the reverse CFG for bottom-up, because we
- // magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release. We can't use
- // ReversePostOrderTraversal here because we want to walk up from each
- // function exit point.
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+ /// Backedges - Backedges detected in the DFS. These edges will be
+ /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+ /// traversed in the desired order.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+ /// Visited - The visited set, for doing DFS walks.
SmallPtrSet<BasicBlock *, 16> Visited;
- SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
- SmallVector<BasicBlock *, 16> Order;
+
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ succ_iterator End = succ_end(SuccStack.back().first);
+ while (SuccStack.back().second != End) {
+ BasicBlock *BB = *SuccStack.back().second++;
+ if (Visited.insert(BB)) {
+ SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ OnStack.insert(BB);
+ goto dfs_next_succ;
+ }
+ if (OnStack.count(BB))
+ Backedges.insert(std::make_pair(SuccStack.back().first, BB));
+ }
+ OnStack.erase(SuccStack.back().first);
+ PostOrder.push_back(SuccStack.pop_back_val().first);
+ } while (!SuccStack.empty());
+
+ Visited.clear();
+
+ // Compute the exits, which are the starting points for reverse-CFG DFS.
+ SmallVector<BasicBlock *, 4> Exits;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *BB = I;
if (BB->getTerminator()->getNumSuccessors() == 0)
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ Exits.push_back(BB);
}
- while (!Stack.empty()) {
- pred_iterator End = pred_end(Stack.back().first);
- while (Stack.back().second != End) {
- BasicBlock *BB = *Stack.back().second++;
- if (Visited.insert(BB))
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
- }
- Order.push_back(Stack.pop_back_val().first);
+
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+ I != E; ++I) {
+ BasicBlock *ExitBB = *I;
+ PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ pred_iterator End = pred_end(PredStack.back().first);
+ while (PredStack.back().second != End) {
+ BasicBlock *BB = *PredStack.back().second++;
+ // Skip backedges detected in the forward-CFG DFS.
+ if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
+ continue;
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+ goto reverse_dfs_next_succ;
+ }
+ }
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+ }
}
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- Order.rbegin(), E = Order.rend(); I != E; ++I) {
- BasicBlock *BB = *I;
- BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
- }
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
- // Use regular reverse-postorder for top-down.
+ // Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- typedef ReversePostOrderTraversal<Function *> RPOTType;
- RPOTType RPOT(&F);
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = *I;
- TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
- }
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
UE = Alloca->use_end(); UI != UE; ) {
CallInst *UserInst = cast<CallInst>(*UI++);
if (!UserInst->use_empty())
- UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
UserInst->eraseFromParent();
}
Alloca->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f6762ad..e4cb55c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -156,6 +157,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const TargetData *td) : TD(td) {}
+ SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+ : TD(td), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1125,7 +1128,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI))
return markConstant(I, C);
}
@@ -1517,6 +1520,9 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
//
bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1641,6 +1649,9 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1650,7 +1661,11 @@ namespace {
} // end anonymous namespace
char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
@@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 4b14efc..bc70c51 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return false;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
// See if all uses can be converted.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6e169de..f3184ec 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization {
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, Op, "cttz");
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
// If this is writing one byte, turn it into fputc.
- if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c..ef65c0a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!Inst->isSafeToSpeculativelyExecute()) {
+ if (!isSafeToSpeculativelyExecute(Inst)) {
DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
return false;
}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb..d831452 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
- BasicBlock *MemBB = MemoryInst->getParent();
- for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
- UI != E; ++UI)
- // We know that uses of arguments and instructions have to be instructions.
- if (cast<Instruction>(*UI)->getParent() == MemBB)
- return true;
-
- return false;
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd..ef4a473 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BranchInst *BI = BranchInst::Create(BB, NewBB);
// Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// This is slightly more strict than necessary; the minimum requirement
// is that there be no more than one indirectbr branching to BB. And
// all BlockAddress uses would need to be updated.
@@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// node becomes an incoming value for BB's phi node. However, if the Preds
// list is empty, we need to insert dummy entries into the PHI nodes in BB to
// account for the newly created predecessor.
- if (NumPreds == 0) {
+ if (Preds.size() == 0) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
- P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
- P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
return NewBB;
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c052910..f752d79 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// form, which we're in the process of restoring!
if (!Preds.empty() && HasPredOutsideOfLoop) {
BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
- "split", P);
+ SplitBlockPredecessors(Exit, Preds, "split", P);
if (P->mustPreserveAnalysisID(LCSSAID))
CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6d5432d..d96f59c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
Local.cpp
LoopSimplify.cpp
LoopUnroll.cpp
+ LoopUnrollRuntime.cpp
LowerExpectIntrinsic.cpp
LowerInvoke.cpp
LowerSwitch.cpp
@@ -28,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
Utils.cpp
ValueMapper.cpp
)
-
-add_llvm_library_dependencies(LLVMTransformUtils
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMipa
- )
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dd4a659..f89e1b1 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
- // Find the personality function used by the landing pads of the caller. If it
- // exists, then check to see that it matches the personality function used in
- // the callee.
- for (Function::const_iterator
- I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'isa' here should become go away once the new EH system is
- // in place.
- if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
- continue;
- const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
- const Value *CallerPersFn = LP->getPersonalityFn();
-
- // If the personality functions match, then we can perform the
- // inlining. Otherwise, we can't inline.
- // TODO: This isn't 100% true. Some personality functions are proper
- // supersets of others and can be used in place of the other.
- for (Function::const_iterator
- I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
- if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
- const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
- // the new EH system is in place.
- if (const LandingPadInst *LP =
- dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
- if (CallerPersFn != LP->getPersonalityFn())
- return false;
- break;
- }
-
+ // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+ // the new EH system is in place.
+ if (const LandingPadInst *LP =
+ dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ CalleePersonality = LP->getPersonalityFn();
break;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality)
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'isa' here should become go away once the new EH system
+ // is in place.
+ if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+ continue;
+ const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
//
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
index dea7b02..88b2ffe 100644
--- a/lib/Transforms/Utils/LLVMBuild.txt
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = TransformUtils
parent = Transforms
required_libraries = Analysis Core IPA Support Target
-
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 134ab71..4dd93cf 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -494,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
if (Succ->getSinglePredecessor()) return true;
// Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (BBPreds.count(P))
- CommonPreds.insert(P);
- }
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -520,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// merge the phi nodes and then the blocks can still be merged
PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
+ << IBB->getName() << "\n");
return false;
}
}
} else {
Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
// See if the incoming value for the common predecessor is equal to the
// one for BB, in which case this phi node will not prevent the merging
// of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
+ << "predecessor " << IBB->getName() << "\n");
return false;
}
}
@@ -748,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8..4376265 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM)) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
}
// Split out the loop pre-header.
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
- ".preheader", this);
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", this, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
- NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
- << "\n");
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
- return NewBB;
+ return PreheaderBB;
}
/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
this, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader (this excludes
+ // loop headers which are targeted by an indirectbr).
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
@@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SmallVector<BasicBlock*, 8> OuterLoopPreds;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) != PN ||
- !L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirectbr edges.
- if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
- return 0;
-
+ !L->contains(PN->getIncomingBlock(i)))
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
- }
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
@@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SE->forgetLoop(L);
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
- OuterLoopPreds.size(),
- ".outer", this);
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
if (!Preheader)
return 0;
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa2..b96f14b 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return false;
}
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE)
- SE->forgetLoop(L);
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
@@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
}
DEBUG(dbgs() << "!\n");
}
@@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 0000000..b351852
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+ Value *OrigVal = PN->getIncomingValueForBlock(Latch);
+ Value *V = OrigVal;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 1)
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 5e294a3..8491c55 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -19,7 +19,8 @@
using namespace llvm;
-void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
StructType *Ty = StructType::get(
@@ -31,7 +32,7 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) {
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
@@ -51,6 +52,13 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
// Create the new global variable and replace all uses of
// the old global variable with the new one.
(void)new GlobalVariable(M, NewInit->getType(), false,
- GlobalValue::AppendingLinkage, NewInit,
- "llvm.global_ctors");
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e942..e8f4285 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -590,7 +590,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4..bf2cb49 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
unsigned Cost = 0;
@@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
- FrontIt->isSafeToSpeculativelyExecute()) {
+ isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f9..81eb9e0 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) {
const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -60,7 +63,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
}
char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
- false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
// Public interface to the simplify instructions pass.