aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt9
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp11
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp13
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp4
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp7
-rw-r--r--lib/Transforms/Scalar/GVN.cpp9
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp44
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp16
-rw-r--r--lib/Transforms/Scalar/LICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp148
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp104
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp4
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp162
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp27
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp5
-rw-r--r--lib/Transforms/Scalar/Sink.cpp3
21 files changed, 446 insertions, 205 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a6f0cf3..d660c72 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
-
-add_llvm_library_dependencies(LLVMScalarOpts
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b2..f9abfe9 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -69,6 +70,7 @@ namespace {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
@@ -97,6 +99,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
@@ -116,7 +119,10 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
@@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
WeakVH IterHandle(CurInstIterator);
ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- ModifiedDT ? 0 : DT);
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6..5430f62 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
#include "llvm/Constant.h"
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
#include <set>
@@ -42,19 +44,22 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
}
-
bool ConstantPropagation::runOnFunction(Function &F) {
// Initialize the worklist to all of the instructions ready to process...
std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
Instruction *I = *WorkList.begin();
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f5688cb..8729019 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// writes to addresses which will definitely be overwritten later
if (LaterOff > EarlierOff &&
LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- LaterOff + Later.Size >= EarlierOff + Earlier.Size)
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;
// Otherwise, they don't completely overlap.
@@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
BasicBlock *BB, DominatorTree *DT) {
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *Pred = *I;
+ if (Pred == BB) continue;
TerminatorInst *PredTI = Pred->getTerminator();
if (PredTI->getNumSuccessors() != 1)
continue;
@@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
I != E; ++I)
DeadStackObjects.erase(*I);
}
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2..5241e11 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
@@ -215,6 +216,7 @@ namespace {
class EarlyCSE : public FunctionPass {
public:
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -263,6 +265,7 @@ private:
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
};
@@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
@@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
// Tables that the pass uses when walking the domtree.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a51cbb6..374fdd7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
@@ -446,7 +447,8 @@ namespace {
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
const TargetData *TD;
-
+ const TargetLibraryInfo *TLI;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +532,7 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 0772b48..ad8689a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) {
continue;
// Ignore fancy-aligned globals for now.
- unsigned Alignment = I->getAlignment();
+ unsigned Alignment = TD->getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1f21108..6d52b22 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-namespace llvm {
- cl::opt<bool> EnableIVRewrite(
- "enable-iv-rewrite", cl::Hidden,
- cl::desc("Enable canonical induction variable rewriting"));
-
- // Trip count verification can be enabled by default under NDEBUG if we
- // implement a strong expression equivalence checker in SCEV. Until then, we
- // use the verify-indvars flag, which may assert in some cases.
- cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+static cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
namespace {
class IndVarSimplify : public LoopPass {
@@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
if (FromBase == ToBase)
@@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return 0;
+ // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
- SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
- IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
return 0;
@@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution *SE) {
+ if (!Processed.insert(S))
+ return false;
+
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
@@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
- if (isHighCostExpansion(*I, BI, SE))
+ if (isHighCostExpansion(*I, BI, Processed, SE))
return true;
}
return false;
@@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
return false;
return true;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af3..c78db3f 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
@@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8098b36..8795cd8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<TargetLibraryInfo>();
}
bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
+ TargetData *TD; // TargetData for constant folding.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
// State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I)) {
+ if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
- if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load")))
+ if (LI->getMetadata("invariant.load"))
return true;
// Don't hoist loads which have may-aliased stores in loop.
@@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (Inst.isSafeToSpeculativelyExecute())
+ if (isSafeToSpeculativelyExecute(&Inst))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index 027634d..cee9119 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = Scalar
parent = Transforms
library_name = ScalarOpts
required_libraries = Analysis Core InstCombine Support Target TransformUtils
-
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c..f0f05e6 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.addPreserved("scalar-evolution");
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
@@ -50,6 +52,7 @@ namespace {
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = &getAnalysis<LoopInfo>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, DT);
+ Value *V = SimplifyInstruction(I, TD, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ae51d5..840614e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,17 @@
#include <algorithm>
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EnableNested(
+static cl::opt<bool> EnableNested(
"enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-cl::opt<bool> EnableRetry(
- "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+static cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
- "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
namespace {
@@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) {
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
@@ -705,7 +716,8 @@ public:
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
@@ -718,7 +730,8 @@ private:
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
@@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg,
// on other loops, and cannot be expected to change sibling loops. If the
// AddRec exists, consider it's register free and leave it alone. Otherwise,
// do not consider this formula at all.
- // FIXME: why do we need to generate such fomulae?
else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
- }
+ if (isExistingPhi(AR, SE))
+ return;
+
+ // For !EnableNested, never rewrite IVs in other loops.
if (!EnableNested) {
Loose();
return;
@@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg,
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
@@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
@@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1389,7 +1405,6 @@ class LSRInstance {
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
-public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1450,6 +1465,7 @@ public:
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
+public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- Strides.insert(AR->getStepRecurrence(SE));
+ if (EnableNested || AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
@@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() {
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 37f4c2c..22dbfe3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
-// Temporary flag to be removed in 3.0
static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
- cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
namespace {
class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ // Default unroll count for loops with run-time trip count if
+ // -unroll-count is not set
+ static const unsigned UnrollRuntimeCount = 8;
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- if (!NoSCEVUnroll) {
- // Find "latch trip count". UnrollLoop assumes that control cannot exit
- // via the loop latch on any iteration prior to TripCount. The loop may exit
- // early via an earlier branch.
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (LatchBlock) {
- TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
- }
- }
- else {
- TripCount = L->getSmallConstantTripCount();
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- // Automatically select an unroll count.
+ // Use a default unroll-count if the user doesn't specify a value
+ // and the trip count is a run-time value. The default is different
+ // for run-time or compile-time trip count loops.
unsigned Count = CurrentCount;
+ if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ Count = UnrollRuntimeCount;
+
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial) {
+ if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
+ if (TripCount) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = CurrentThreshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0)
+ Count--;
+ }
+ else if (UnrollRuntime) {
+ // Reduce unroll count to be a lower power-of-two value
+ while (Count != 0 && Size > CurrentThreshold) {
+ Count >>= 1;
+ Size = LoopSize*Count;
+ }
}
if (Count < 2) {
DEBUG(dbgs() << " could not unroll partially\n");
@@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c..a2d0e98 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -71,7 +71,9 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
- SmallPtrSet<Value *,8> UnswitchedVals;
+
+ // FIXME: Consider custom class for this.
+ std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
bool OptimizeForSize;
bool redoLoop;
@@ -117,7 +119,15 @@ namespace {
private:
virtual void releaseMemory() {
- UnswitchedVals.clear();
+ // We need to forget about all switches in the current loop.
+ // FIXME: Do it better than enumerating all blocks of code
+ // and see if it is a switch instruction.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
+ if (SI)
+ UnswitchedVals.erase(SI);
+ }
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -128,6 +138,12 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
+
+ /// For new loop switches we clone info about values that was
+ /// already unswitched and has redundant successors.
+ /// Note, that new loop data is stored inside the VMap.
+ void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB);
void initLoopData() {
loopHeader = currentLoop->getHeader();
@@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && SI->getNumCases() > 1) {
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = SI->getCaseValue(1);
+ Constant *UnswitchVal = NULL;
+
// Do not process same value again and again.
- if (!UnswitchedVals.insert(UnswitchVal))
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ Constant* UnswitchValCandidate = SI->getCaseValue(i);
+ if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
+/// For new loop switches we clone info about values that was
+/// already unswitched and has redundant successors.
+/// Not that new loop data is stored inside the VMap.
+void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB) {
+
+ const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
+ if (SI && UnswitchedVals.count(SI)) {
+ // Don't clone a totally simplified switch.
+ if (isa<Constant>(SI->getCondition()))
+ return;
+ Value* I = VMap.lookup(SI);
+ assert(I && "All instructions that are in SrcBB must be in VMap.");
+ UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
+ }
+}
+
/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
/// loop with no side effects (including infinite loops).
///
@@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// Check to see if a successor of the switch is guaranteed to go to the
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this. Note that we can't trivially unswitch on the default case.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
- if (Val) *Val = SI->getCaseValue(i);
+ ConstantInt* CaseVal = SI->getCaseValue(i);
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (UnswitchedVals[SI].count(CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
break;
}
+ }
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
@@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
- if (Metrics.NumInsts > Threshold ||
- Metrics.NumBlocks * 5 > Threshold ||
+
+ unsigned NumUnswitched =
+ (NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
+
+ unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
+ unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
+
+ if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
@@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
if (!ExitBlock->isLandingPad()) {
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
+ // Inherit simplified switches info for NewBB
+ // We needn't pass NewBB since its instructions are already contained
+ // inside the VMap.
+ CloneUnswitchedVals(VMap, LoopBlocks[i]);
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Instruction *U = dyn_cast<Instruction>(*UI);
if (!U || !L->contains(U))
continue;
- U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
}
+
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+ UI != Worklist.end(); ++UI)
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
SimplifyCode(Worklist, L);
return;
}
@@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = SI->getSuccessor(DeadCase);
BasicBlock *Latch = L->getLoopLatch();
+
+ UnswitchedVals[SI].insert(Val);
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (Value *V = SimplifyInstruction(I, 0, 0, DT))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9e4f51f..7335626 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
- // If we found more than 8 stores to merge or 64 bytes, use memset.
- if (TheStores.size() >= 8 || End-Start >= 64) return true;
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 80f5f01..8e9449f 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
- // Only consider values with pointer types, and not function pointers.
+ // Only consider values with pointer types.
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never reference-counted, however clang occasionally
+ // bitcasts reference-counted pointers to function-pointer type
+ // temporarily.
PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ if (!Ty)
return false;
// Conservatively assume anything else is a potential use.
return true;
@@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
#include "llvm/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1165,6 +1170,7 @@ namespace {
/// Partial - True of we've seen an opportunity for partial RR elimination,
/// such as pushing calls into a CFG triangle or into one side of a
/// CFG diamond.
+ /// TODO: Consider moving this to PtrState.
bool Partial;
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
@@ -1251,16 +1257,6 @@ namespace {
Seq = NewSeq;
}
- void SetSeqToRelease(MDNode *M) {
- if (Seq == S_None || Seq == S_Use) {
- Seq = M ? S_MovableRelease : S_Release;
- RRI.ReleaseMetadata = M;
- } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
- Seq = S_Release;
- RRI.ReleaseMetadata = 0;
- }
- }
-
Sequence GetSeq() const {
return Seq;
}
@@ -1488,7 +1484,7 @@ namespace {
/// metadata.
unsigned ImpreciseReleaseMDKind;
- /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape
+ /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
/// metadata.
unsigned CopyOnEscapeMDKind;
@@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
case S_CanRelease: {
const Value *Arg = I->first;
@@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
}
}
@@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
NestingDetected = true;
- S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
@@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- assert(I != BBStates.end());
// If we haven't seen this node yet, then we've found a CFG cycle.
// Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (!I->second.isVisitedTopDown())
+ if (I == BBStates.end() || !I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- assert(I != BBStates.end());
- if (I->second.isVisitedTopDown())
+ if (I == BBStates.end() || I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
return NestingDetected;
}
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
- // Use reverse-postorder on the reverse CFG for bottom-up, because we
- // magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release. We can't use
- // ReversePostOrderTraversal here because we want to walk up from each
- // function exit point.
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+ /// Backedges - Backedges detected in the DFS. These edges will be
+ /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+ /// traversed in the desired order.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+ /// Visited - The visited set, for doing DFS walks.
SmallPtrSet<BasicBlock *, 16> Visited;
- SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
- SmallVector<BasicBlock *, 16> Order;
+
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ succ_iterator End = succ_end(SuccStack.back().first);
+ while (SuccStack.back().second != End) {
+ BasicBlock *BB = *SuccStack.back().second++;
+ if (Visited.insert(BB)) {
+ SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ OnStack.insert(BB);
+ goto dfs_next_succ;
+ }
+ if (OnStack.count(BB))
+ Backedges.insert(std::make_pair(SuccStack.back().first, BB));
+ }
+ OnStack.erase(SuccStack.back().first);
+ PostOrder.push_back(SuccStack.pop_back_val().first);
+ } while (!SuccStack.empty());
+
+ Visited.clear();
+
+ // Compute the exits, which are the starting points for reverse-CFG DFS.
+ SmallVector<BasicBlock *, 4> Exits;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *BB = I;
if (BB->getTerminator()->getNumSuccessors() == 0)
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ Exits.push_back(BB);
}
- while (!Stack.empty()) {
- pred_iterator End = pred_end(Stack.back().first);
- while (Stack.back().second != End) {
- BasicBlock *BB = *Stack.back().second++;
- if (Visited.insert(BB))
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
- }
- Order.push_back(Stack.pop_back_val().first);
+
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+ I != E; ++I) {
+ BasicBlock *ExitBB = *I;
+ PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ pred_iterator End = pred_end(PredStack.back().first);
+ while (PredStack.back().second != End) {
+ BasicBlock *BB = *PredStack.back().second++;
+ // Skip backedges detected in the forward-CFG DFS.
+ if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
+ continue;
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+ goto reverse_dfs_next_succ;
+ }
+ }
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+ }
}
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- Order.rbegin(), E = Order.rend(); I != E; ++I) {
- BasicBlock *BB = *I;
- BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
- }
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
- // Use regular reverse-postorder for top-down.
+ // Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- typedef ReversePostOrderTraversal<Function *> RPOTType;
- RPOTType RPOT(&F);
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = *I;
- TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
- }
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
UE = Alloca->use_end(); UI != UE; ) {
CallInst *UserInst = cast<CallInst>(*UI++);
if (!UserInst->use_empty())
- UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
UserInst->eraseFromParent();
}
Alloca->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f6762ad..e4cb55c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -156,6 +157,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const TargetData *td) : TD(td) {}
+ SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+ : TD(td), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1125,7 +1128,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI))
return markConstant(I, C);
}
@@ -1517,6 +1520,9 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
//
bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1641,6 +1649,9 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1650,7 +1661,11 @@ namespace {
} // end anonymous namespace
char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
@@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 4b14efc..bc70c51 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return false;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
// See if all uses can be converted.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6e169de..f3184ec 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization {
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, Op, "cttz");
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
// If this is writing one byte, turn it into fputc.
- if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c..ef65c0a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!Inst->isSafeToSpeculativelyExecute()) {
+ if (!isSafeToSpeculativelyExecute(Inst)) {
DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
return false;
}