aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp33
-rw-r--r--lib/Transforms/IPO/Inliner.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp15
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp11
-rw-r--r--lib/Transforms/Instrumentation/BlackList.cpp102
-rw-r--r--lib/Transforms/Instrumentation/BlackList.h55
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp6
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt2
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.cpp79
-rw-r--r--lib/Transforms/Instrumentation/FunctionBlackList.h37
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp11
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp12
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp46
-rw-r--r--lib/Transforms/Scalar/DCE.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp36
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp26
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp17
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp2
-rw-r--r--lib/Transforms/Scalar/LICM.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp24
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp65
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp14
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp91
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp52
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp154
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp2
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp4
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp253
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/Local.cpp36
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp777
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp2
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp2
37 files changed, 1498 insertions, 540 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 6d950d2..b888e95 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -346,7 +346,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
/// Given a value that is stored to a global but never read, determine whether
/// it's safe to remove the store and the chain of computation that feeds the
/// store.
-static bool IsSafeComputationToRemove(Value *V) {
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
do {
if (isa<Constant>(V))
return true;
@@ -355,7 +355,7 @@ static bool IsSafeComputationToRemove(Value *V) {
if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
isa<GlobalValue>(V))
return false;
- if (isAllocationFn(V))
+ if (isAllocationFn(V, TLI))
return true;
Instruction *I = cast<Instruction>(V);
@@ -376,7 +376,8 @@ static bool IsSafeComputationToRemove(Value *V) {
/// of the global and clean up any that obviously don't assign the global a
/// value that isn't dynamically allocated.
///
-static bool CleanupPointerRootUsers(GlobalVariable *GV) {
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+ const TargetLibraryInfo *TLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
// usage over time. The common strategy for leak checkers is to whitelist the
@@ -432,18 +433,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
- CleanupPointerRootUsers(GV);
+ CleanupPointerRootUsers(GV, TLI);
return true;
}
}
}
for (int i = 0, e = Dead.size(); i != e; ++i) {
- if (IsSafeComputationToRemove(Dead[i].first)) {
+ if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
- if (isAllocationFn(I))
+ if (isAllocationFn(I, TLI))
break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
@@ -975,7 +976,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
if (isLeakCheckerRoot(GV)) {
- Changed |= CleanupPointerRootUsers(GV);
+ Changed |= CleanupPointerRootUsers(GV, TLI);
} else {
Changed = true;
CleanupConstantGlobalUsers(GV, 0, TD, TLI);
@@ -1465,9 +1466,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, TargetData *TD) {
+ Value *NElems, TargetData *TD,
+ const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
- Type *MAT = getMallocAllocatedType(CI);
+ Type *MAT = getMallocAllocatedType(CI, TLI);
StructType *STy = cast<StructType>(MAT);
// There is guaranteed to be at least one use of the malloc (storing
@@ -1688,7 +1690,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
// We cannot optimize the malloc if we cannot determine malloc array size.
- Value *NElems = getMallocArraySize(CI, TD, true);
+ Value *NElems = getMallocArraySize(CI, TD, TLI, true);
if (!NElems)
return false;
@@ -1725,7 +1727,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
- if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
@@ -1742,7 +1744,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
+ TD, TLI);
return true;
}
@@ -1771,8 +1774,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// Optimize away any trapping uses of the loaded value.
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
return true;
- } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- Type *MallocType = getMallocAllocatedType(CI);
+ } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+ Type *MallocType = getMallocAllocatedType(CI, TLI);
if (MallocType &&
TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
TD, TLI))
@@ -1964,7 +1967,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
bool Changed;
if (isLeakCheckerRoot(GV)) {
// Delete any constant stores to the global.
- Changed = CleanupPointerRootUsers(GV);
+ Changed = CleanupPointerRootUsers(GV, TLI);
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 712888a..69a22fb 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -339,6 +340,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -417,7 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction())) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cbe1ca4..b12fc01 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -168,7 +168,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
- if (isFreeCall(&CI))
+ if (isFreeCall(&CI, TLI))
return visitFree(CI);
// If the caller function is nounwind, mark the call as nounwind, even if the
@@ -243,7 +243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
default: break;
case Intrinsic::objectsize: {
uint64_t Size;
- if (getObjectSize(II->getArgOperand(0), Size, TD))
+ if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
return 0;
}
@@ -877,7 +877,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
- if (isAllocLikeFn(CS.getInstruction()))
+ if (isAllocLikeFn(CS.getInstruction(), TLI))
return visitAllocSite(*CS.getInstruction());
bool Changed = false;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 35a0bbb..2a7182f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -462,6 +462,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
}
}
+ // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+ Value *X;
+ ConstantInt *C1;
+ if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+ APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+ return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+ }
+ }
+
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 68ecd51..ff758c4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1068,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0))) {
+ isAllocationFn(BCI->getOperand(0), TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1107,7 +1107,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 4> Worklist;
Worklist.push_back(AI);
@@ -1163,7 +1164,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
}
}
- if (isFreeCall(I)) {
+ if (isFreeCall(I, TLI)) {
Users.push_back(I);
continue;
}
@@ -1188,7 +1189,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
SmallVector<WeakVH, 64> Users;
- if (isAllocSiteRemovable(&MI, Users)) {
+ if (isAllocSiteRemovable(&MI, Users, TLI)) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
Instruction *I = cast_or_null<Instruction>(&*Users[i]);
if (!I) continue;
@@ -1872,7 +1873,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Instruction *Inst = BBI++;
// DCE instruction if trivially dead.
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
@@ -2002,7 +2003,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
if (I == 0) continue; // skip null values.
// Check to see if we can DCE the instruction.
- if (isInstructionTriviallyDead(I)) {
+ if (isInstructionTriviallyDead(I, TLI)) {
DEBUG(errs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
@@ -2102,7 +2103,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// If the instruction was modified, it's possible that it is now dead.
// if so, remove it.
- if (isInstructionTriviallyDead(I)) {
+ if (isInstructionTriviallyDead(I, TLI)) {
EraseInstFromFunction(*I);
} else {
Worklist.Add(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 06f4d2f..0775cf4 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "asan"
-#include "FunctionBlackList.h"
+#include "BlackList.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
@@ -217,7 +217,7 @@ struct AddressSanitizer : public ModulePass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Instruction *CtorInsertBefore;
- OwningPtr<FunctionBlackList> BL;
+ OwningPtr<BlackList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
@@ -544,6 +544,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G);
+ if (BL->isIn(*G)) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
// Touch only those globals that will not be defined in other modules.
@@ -643,6 +644,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
// Determine whether this global should be poisoned in initialization.
bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
+ // Don't check initialization order if this global is blacklisted.
+ GlobalHasDynamicInitializer &= !BL->isInInit(*G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -736,7 +739,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
TD = getAnalysisIfAvailable<TargetData>();
if (!TD)
return false;
- BL.reset(new FunctionBlackList(ClBlackListFile));
+ BL.reset(new BlackList(ClBlackListFile));
C = &(M.getContext());
LongSize = TD->getPointerSizeInBits();
@@ -774,7 +777,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
/*hasSideEffects=*/true);
llvm::Triple targetTriple(M.getTargetTriple());
- bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI;
+ bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid :
(LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64);
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
new file mode 100644
index 0000000..2cb1199
--- /dev/null
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -0,0 +1,102 @@
+//===-- BlackList.cpp - blacklist for sanitizers --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include <utility>
+#include <string>
+
+#include "BlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+BlackList::BlackList(const StringRef Path) {
+ // Validate and open blacklist file.
+ if (!Path.size()) return;
+ OwningPtr<MemoryBuffer> File;
+ if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ report_fatal_error("Can't open blacklist file: " + Path + ": " +
+ EC.message());
+ }
+
+ // Iterate through each line in the blacklist file.
+ SmallVector<StringRef, 16> Lines;
+ SplitString(File.take()->getBuffer(), Lines, "\n\r");
+ StringMap<std::string> Regexps;
+ for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I) {
+ // Get our prefix and unparsed regexp.
+ std::pair<StringRef, StringRef> SplitLine = I->split(":");
+ StringRef Prefix = SplitLine.first;
+ std::string Regexp = SplitLine.second;
+
+ // Replace * with .*
+ for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
+
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string Error;
+ if (!CheckRE.isValid(Error)) {
+ report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+ ": " + Error);
+ }
+
+ // Add this regexp into the proper group by its prefix.
+ if (Regexps[Prefix].size())
+ Regexps[Prefix] += "|";
+ Regexps[Prefix] += Regexp;
+ }
+
+ // Iterate through each of the prefixes, and create Regexs for them.
+ for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end();
+ I != E; ++I) {
+ Entries[I->getKey()] = new Regex(I->getValue());
+ }
+}
+
+bool BlackList::isIn(const Function &F) {
+ return isIn(*F.getParent()) || inSection("fun", F.getName());
+}
+
+bool BlackList::isIn(const GlobalVariable &G) {
+ return isIn(*G.getParent()) || inSection("global", G.getName());
+}
+
+bool BlackList::isIn(const Module &M) {
+ return inSection("src", M.getModuleIdentifier());
+}
+
+bool BlackList::isInInit(const GlobalVariable &G) {
+ return isIn(*G.getParent()) || inSection("global-init", G.getName());
+}
+
+bool BlackList::inSection(const StringRef Section,
+ const StringRef Query) {
+ Regex *FunctionRegex = Entries[Section];
+ return FunctionRegex ? FunctionRegex->match(Query) : false;
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
new file mode 100644
index 0000000..73977fc
--- /dev/null
+++ b/lib/Transforms/Instrumentation/BlackList.h
@@ -0,0 +1,55 @@
+//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+// The blacklist disables instrumentation of various functions and global
+// variables. Each line contains a prefix, followed by a wild card expression.
+// ---
+// fun:*_ZN4base6subtle*
+// global:*global_with_bad_access_or_initialization*
+// global-init:*global_with_initialization_issues*
+// src:file_with_tricky_code.cc
+// ---
+// Note that the wild card is in fact an llvm::Regex, but * is automatically
+// replaced with .*
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+class Function;
+class GlobalVariable;
+class Module;
+class Regex;
+class StringRef;
+
+class BlackList {
+ public:
+ BlackList(const StringRef Path);
+ // Returns whether either this function or it's source file are blacklisted.
+ bool isIn(const Function &F);
+ // Returns whether either this global or it's source file are blacklisted.
+ bool isIn(const GlobalVariable &G);
+ // Returns whether this module is blacklisted by filename.
+ bool isIn(const Module &M);
+ // Returns whether a global should be excluded from initialization checking.
+ bool isInInit(const GlobalVariable &G);
+ private:
+ StringMap<Regex*> Entries;
+
+ bool inSection(const StringRef Section, const StringRef Query);
+};
+
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 09e0f14..6429081 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/TargetFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Instrumentation.h"
using namespace llvm;
@@ -48,10 +49,12 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
Instruction *Inst;
@@ -166,11 +169,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
bool BoundsChecking::runOnFunction(Function &F) {
TD = &getAnalysis<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
TrapBB = 0;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext());
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 00de882..058f68c 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,8 +1,8 @@
add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
+ BlackList.cpp
BoundsChecking.cpp
EdgeProfiling.cpp
- FunctionBlackList.cpp
GCOVProfiling.cpp
Instrumentation.cpp
OptimalEdgeProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
deleted file mode 100644
index 188ea4d..0000000
--- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-
-#include "FunctionBlackList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Function.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-
-namespace llvm {
-
-FunctionBlackList::FunctionBlackList(const std::string &Path) {
- Functions = NULL;
- const char *kFunPrefix = "fun:";
- if (!Path.size()) return;
- std::string Fun;
-
- OwningPtr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
- report_fatal_error("Can't open blacklist file " + Path + ": " +
- EC.message());
- }
- MemoryBuffer *Buff = File.take();
- const char *Data = Buff->getBufferStart();
- size_t DataLen = Buff->getBufferSize();
- SmallVector<StringRef, 16> Lines;
- SplitString(StringRef(Data, DataLen), Lines, "\n\r");
- for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
- if (Lines[i].startswith(kFunPrefix)) {
- std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
- std::string ThisFuncRE;
- // add ThisFunc replacing * with .*
- for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
- if (ThisFunc[j] == '*')
- ThisFuncRE += '.';
- ThisFuncRE += ThisFunc[j];
- }
- // Check that the regexp is valid.
- Regex CheckRE(ThisFuncRE);
- std::string Error;
- if (!CheckRE.isValid(Error))
- report_fatal_error("malformed blacklist regex: " + ThisFunc +
- ": " + Error);
- // Append to the final regexp.
- if (Fun.size())
- Fun += "|";
- Fun += ThisFuncRE;
- }
- }
- if (Fun.size()) {
- Functions = new Regex(Fun);
- }
-}
-
-bool FunctionBlackList::isIn(const Function &F) {
- if (Functions) {
- bool Res = Functions->match(F.getName());
- return Res;
- }
- return false;
-}
-
-} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
deleted file mode 100644
index c1239b9..0000000
--- a/lib/Transforms/Instrumentation/FunctionBlackList.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include <string>
-
-namespace llvm {
-class Function;
-class Regex;
-
-// Blacklisted functions are not instrumented.
-// The blacklist file contains one or more lines like this:
-// ---
-// fun:FunctionWildCard
-// ---
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-class FunctionBlackList {
- public:
- FunctionBlackList(const std::string &Path);
- bool isIn(const Function &F);
- private:
- Regex *Functions;
-};
-
-} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 264a6a6..9fcde31 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -88,11 +88,10 @@ namespace {
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *,
- MDNode *>, 8> &);
+ void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- std::string mangleName(DICompileUnit CU, std::string NewStem);
+ std::string mangleName(DICompileUnit CU, const char *NewStem);
bool EmitNotes;
bool EmitData;
@@ -329,7 +328,7 @@ namespace {
};
}
-std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) {
+std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
MDNode *N = GCov->getOperand(i);
@@ -630,7 +629,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
}
void GCOVProfiler::insertCounterWriteout(
- SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
+ ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy =
FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = Function::Create(WriteoutFTy,
@@ -652,7 +651,7 @@ void GCOVProfiler::insertCounterWriteout(
std::string FilenameGcda = mangleName(compile_unit, "gcda");
Builder.CreateCall(StartFile,
Builder.CreateGlobalStringPtr(FilenameGcda));
- for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+ for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
I = CountersBySP.begin(), E = CountersBySP.end();
I != E; ++I) {
DISubprogram SP(I->second);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index dc0fa71..17b7775 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,7 +21,7 @@
#define DEBUG_TYPE "tsan"
-#include "FunctionBlackList.h"
+#include "BlackList.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
@@ -50,7 +50,7 @@ static cl::opt<std::string> ClBlackListFile("tsan-blacklist",
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
-STATISTIC(NumOmittedReadsBeforeWrite,
+STATISTIC(NumOmittedReadsBeforeWrite,
"Number of reads ignored due to following writes");
STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
@@ -77,7 +77,7 @@ struct ThreadSanitizer : public FunctionPass {
int getMemoryAccessFuncIndex(Value *Addr);
TargetData *TD;
- OwningPtr<FunctionBlackList> BL;
+ OwningPtr<BlackList> BL;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
Function *TsanFuncEntry;
@@ -121,7 +121,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<TargetData>();
if (!TD)
return false;
- BL.reset(new FunctionBlackList(ClBlackListFile));
+ BL.reset(new BlackList(ClBlackListFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -186,7 +186,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
NumOmittedReadsFromConstantGlobals++;
return true;
}
- } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+ } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
if (isVtableAccess(L)) {
// Reads from a vtable pointer can not race with any writes.
NumOmittedReadsFromVtable++;
@@ -344,7 +344,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
case NotAtomic: assert(false);
case Unordered: // Fall-through.
case Monotonic: v = 1 << 0; break;
- // case Consume: v = 1 << 1; break; // Not specified yet.
+ // case Consume: v = 1 << 1; break; // Not specified yet.
case Acquire: v = 1 << 2; break;
case Release: v = 1 << 3; break;
case AcquireRelease: v = 1 << 4; break;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a8deda8..5912107 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -43,6 +43,7 @@
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PFI = getAnalysisIfAvailable<ProfileInfo>();
OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
- // First pass, eliminate blocks that contain only PHI nodes and an
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes();
+
+ for (Function::iterator I = F.begin(); I != F.end(); I++) {
+ EverMadeChange |= bypassSlowDivision(F,
+ I,
+ BypassTypeMap);
+ }
+ }
+
+ // Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakVH IterHandle(CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
- RecursivelyDeleteTriviallyDeadInstructions(Repl);
+ RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
if (IterHandle != CurInstIterator) {
// If the iterator instruction was recursively deleted, start over at the
@@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
}
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
- // If we have a SelectInst that will likely profit from branch prediction,
- // turn it into a branch.
- if (DisableSelectToBranch || OptSize || !TLI ||
- !TLI->isPredictableSelectExpensive())
- return false;
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
- if (!SI->getCondition()->getType()->isIntegerTy(1) ||
- !isFormingBranchFromSelectProfitable(SI))
+ // Can we convert the 'select' to CF ?
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (VectorCond)
+ SelectKind = TargetLowering::VectorMaskSelect;
+ else if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ // Do we have efficient codegen support for this kind of 'selects' ?
+ if (TLI->isSelectSupported(SelectKind)) {
+ // We have efficient codegen support for the select instruction.
+ // Check if it is profitable to keep this 'select'.
+ if (!TLI->isPredictableSelectExpensive() ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+ }
+
ModifiedDT = true;
// First, we split the block containing the select into 2 blocks.
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8dbcc23..086f0a1 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -22,6 +22,7 @@
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -38,10 +39,11 @@ namespace {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnBasicBlock(BasicBlock &BB) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = DI++;
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
++DIEEliminated;
@@ -87,6 +89,8 @@ char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
// Start out with all of the instructions in the worklist...
std::vector<Instruction*> WorkList;
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
@@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) {
Instruction *I = WorkList.back();
WorkList.pop_back();
- if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
// Loop over all of the values that the instruction uses, if there are
// instructions being used, add them to the worklist, because they might
// go dead after this one is removed.
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8b1283f..1ff4329 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
SmallSetVector<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
uint64_t Size;
- if (getObjectSize(V, Size, AA.getTargetData()))
+ if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo()))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst)) {
+ if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
MadeChange |= HandleFree(F);
continue;
}
@@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI, *MD);
+ DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD);
+ DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
++NumFastStores;
MadeChange = true;
@@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD);
+ DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
++NumFastStores;
MadeChange = true;
@@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+ else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
+ !PointerMayBeCaptured(I, true, true))
DeadStackObjects.insert(I);
}
@@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
+ &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI)) {
+ if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
+ &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (CallSite CS = cast<Value>(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI))
+ if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
DeadStackObjects.remove(BBI);
// If this call does not access memory, it can't be loading any of our
@@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
LiveAllocas.push_back(*I);
}
- for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
- E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.remove(*I);
-
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
- if (DeadStackObjects.empty())
+ if (DeadStackObjects.size() == LiveAllocas.size())
break;
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.remove(*I);
+
continue;
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 9759549..2627113 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Instruction *Inst = I++;
// Dead instructions should just be removed.
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
Inst->eraseFromParent();
Changed = true;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 4822fd0..16ae6ad 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
}
-uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) {
e = nextValueNumber++;
valueNumbering[C] = e;
@@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
- case Instruction::Or :
+ case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
@@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+#ifndef NDEBUG
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
@@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
+#endif
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
@@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) {
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) {
Value *SwitchCond = SI->getCondition();
BasicBlock *Parent = SI->getParent();
bool Changed = false;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
+ ++SwitchEdges[SI->getSuccessor(i)];
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- BasicBlockEdge E(Parent, Dst);
- if (E.isSingleEdge())
+ // If there is only a single edge, propagate the case value into it.
+ if (SwitchEdges.lookup(Dst) == 1) {
+ BasicBlockEdge E(Parent, Dst);
Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ }
}
return Changed;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 37f8bdf..c933a17 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -68,6 +69,7 @@ namespace {
ScalarEvolution *SE;
DominatorTree *DT;
TargetData *TD;
+ TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
@@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// new comparison.
NewCompare->takeName(Compare);
Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
// Delete the old floating point increment.
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr);
+ RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
// If the FP induction variable still has uses, this is because something else
// in the loop uses its value. In order to canonicalize the induction
@@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
@@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
PN->setIncomingValue(i, ExitVal);
// If this instruction is dead now, delete it.
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
if (NumPreds == 1) {
// Completely replace a single-pred PHI. This is safe, because the
// NewVal won't be variant in the loop, so we don't need an LCSSA phi
// node anymore.
PN->replaceAllUsesWith(ExitVal);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
}
if (NumPreds != 1) {
@@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
Changed = false;
@@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
// The Rewriter may not be used from this point on.
@@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SinkUnusedInvariants(L);
// Clean up dead instructions.
- Changed |= DeleteDeadPHIs(L->getHeader());
+ Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
// Check a post-condition.
assert(L->isLCSSAForm(*DT) &&
"Indvars did not leave the loop in lcssa form!");
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dd42c59..20844c6 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD);
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
// Threaded an edge!
++NumThreads;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0192e92..99bedce 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -108,6 +108,9 @@ namespace {
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ bool MayThrow; // The current loop contains an instruction which
+ // may throw, thus preventing code motion of
+ // instructions with side effects.
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST->add(*BB); // Incorporate the specified basic block
}
+ MayThrow = false;
+ // TODO: We've already searched for instructions which may throw in subloops.
+ // We may want to reuse this information.
+ for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
+ (BB != BBE) && !MayThrow ; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+ (I != E) && !MayThrow; ++I)
+ MayThrow |= I->mayThrow();
+
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
// their loop, into this loop, so there is no need to process the BODIES of
@@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If the instruction is dead, we would try to sink it because it isn't used
// in the loop, instead, just delete it.
- if (isInstructionTriviallyDead(&I)) {
+ if (isInstructionTriviallyDead(&I, TLI)) {
DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
++II;
CurAST->deleteValue(&I);
@@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
if (!FoundMod) return true;
}
- // FIXME: This should use mod/ref information to see if we can hoist or sink
- // the call.
+ // FIXME: This should use mod/ref information to see if we can hoist or
+ // sink the call.
return false;
}
- // Otherwise these instructions are hoistable/sinkable
- return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
- isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
- isa<ShuffleVectorInst>(I);
+ // Only these instructions are hoistable/sinkable.
+ bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
+ isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I));
+ if (!HoistableKind)
+ return false;
+
+ return isSafeToExecuteUnconditionally(I);
}
/// isNotUsedInLoop - Return true if the only users of this instruction are
@@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
}
bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (MayThrow)
+ return false;
+
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ac1082c..a72e288 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
/// deleteIfDeadInstruction - If the specified value is a dead instruction,
/// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
if (Instruction *I = dyn_cast<Instruction>(V))
- if (isInstructionTriviallyDead(I))
- deleteDeadInstruction(I, SE);
+ if (isInstructionTriviallyDead(I, TLI))
+ deleteDeadInstruction(I, SE, TLI);
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(BasePtr, *SE);
+ deleteIfDeadInstruction(BasePtr, *SE, TLI);
return false;
}
@@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, *SE);
+ deleteDeadInstruction(TheStore, *SE, TLI);
++NumMemSet;
return true;
}
@@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(LoadBasePtr, *SE);
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI, *SE);
+ deleteDeadInstruction(SI, *SE, TLI);
++NumMemCpy;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 982400c..f5daa7b 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7eeb152..abe07aa 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
return false;
BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
if (BI == 0 || BI->isUnconditional())
@@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (!L->isLoopExiting(OrigHeader))
return false;
- // Updating PHInodes in loops with multiple exits adds complexity.
- // Keep it simple, and restrict loop rotation to loops with one exit only.
- // In future, lift this restriction and support for multiple exits if
- // required.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() > 1)
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
return false;
// Check size of original header and reject loop if it is very big.
@@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
- BasicBlock *OrigLatch = L->getLoopLatch();
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0 || OrigLatch == 0)
+ if (OrigPreheader == 0)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
@@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
SE->forgetLoop(L);
+ DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
@@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- // Since OrigPreheader now has the conditional branch to Exit block, it is
- // the dominator of Exit.
- DT->changeImmediateDominator(Exit, OrigPreheader);
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ // Everything that was dominated by the old loop header is now dominated
+ // by the original loop preheader. Conceptually the header was merged
+ // into the preheader, even though we reuse the actual block as a new
+ // loop latch.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+ DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+ assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(OrigHeader, OrigLatch);
@@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(NewHeader, OrigPreheader);
DT->changeImmediateDominator(OrigHeader, OrigLatch);
+
+ // Brute force incremental dominator tree update. Call
+ // findNearestCommonDominator on all CFG predecessors of each child of the
+ // original header.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ bool Changed;
+ do {
+ Changed = false;
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+ DomTreeNode *Node = HeaderChildren[I];
+ BasicBlock *BB = Node->getBlock();
+
+ pred_iterator PI = pred_begin(BB);
+ BasicBlock *NearestDom = *PI;
+ for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+ NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+
+ // Remember if this changes the DomTree.
+ if (Node->getIDom()->getBlock() != NearestDom) {
+ DT->changeImmediateDominator(BB, NearestDom);
+ Changed = true;
+ }
+ }
+
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
+ } while (Changed);
}
}
@@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
// emitted code isn't too gross in this common case.
MergeBlockIntoPredecessor(OrigHeader, this);
+ DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
++NumRotated;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 0ae7a51..d7495da 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
+#ifndef NDEBUG
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const {
}
}
+#ifndef NDEBUG
void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
@@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
+#ifndef NDEBUG
void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
+#ifndef NDEBUG
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
+#ifndef NDEBUG
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
@@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
+#ifndef NDEBUG
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
@@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
+#ifndef NDEBUG
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 3222f20..dce8e8b 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
// An ObjC-Identified object can't alias a load if it is never locally stored.
if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return isStoredObjCPointer(A);
if (BIsIdentified) {
- // If both pointers have provenance, they can be directly compared.
- if (A != B)
- return false;
- } else {
- if (isa<LoadInst>(B))
- return isStoredObjCPointer(A);
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return isStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
}
- } else {
- if (BIsIdentified && isa<LoadInst>(A))
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
return isStoredObjCPointer(B);
}
@@ -1381,9 +1384,6 @@ namespace {
/// PtrState - This class summarizes several per-pointer runtime properties
/// which are propogated through the flow graph.
class PtrState {
- /// NestCount - The known minimum level of retain+release nesting.
- unsigned NestCount;
-
/// KnownPositiveRefCount - True if the reference count is known to
/// be incremented.
bool KnownPositiveRefCount;
@@ -1401,7 +1401,7 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+ PtrState() : KnownPositiveRefCount(false), Partial(false),
Seq(S_None) {}
void SetKnownPositiveRefCount() {
@@ -1416,18 +1416,6 @@ namespace {
return KnownPositiveRefCount;
}
- void IncrementNestCount() {
- if (NestCount != UINT_MAX) ++NestCount;
- }
-
- void DecrementNestCount() {
- if (NestCount != 0) --NestCount;
- }
-
- bool IsKnownNested() const {
- return NestCount > 0;
- }
-
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1454,7 +1442,6 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
- NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer, including tests which utilize AliasAnalysis.
+static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialUse(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
/// CanAlterRefCount - Test whether the given instruction can result in a
/// reference count modification (positive or negative) for the pointer's
/// object.
@@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I) {
const Value *Op = *I;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
// Comparing a pointer with null, or any other constant, isn't really a use,
// because we don't care what the pointer points to, or about the values
// of any other dynamic reference-counted pointers.
- if (!IsPotentialUse(ICI->getOperand(1)))
+ if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
return false;
} else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
// For calls, just check the arguments (and not the callee operand).
for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
OE = CS.arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialUse(Op) && PA.related(Op, Ptr);
+ return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
break;
}
case IC_RetainBlock:
@@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.SetKnownPositiveRefCount();
- S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Stop:
@@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Merge the states from each successor to compute the initial state
// for the current block.
- for (BBState::edge_iterator SI(MyStates.succ_begin()),
- SE(MyStates.succ_end()); SI != SE; ++SI) {
+ BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end());
+ if (SI != SE) {
const BasicBlock *Succ = *SI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
assert(I != BBStates.end());
@@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergeSucc(I->second);
}
- break;
}
// Visit all the instructions, bottom-up.
@@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
S.ResetSequenceProgress(S_Retain);
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not sufficient.
- S.RRI.KnownSafe = S.IsKnownNested();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.Calls.insert(Inst);
}
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
// A retain can be a potential use; procede to the generic checking
// code below.
@@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementNestCount();
+ S.ClearRefCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Merge the states from each predecessor to compute the initial state
// for the current block.
- for (BBState::edge_iterator PI(MyStates.pred_begin()),
- PE(MyStates.pred_end()); PI != PE; ++PI) {
+ BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end());
+ if (PI != PE) {
const BasicBlock *Pred = *PI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
assert(I != BBStates.end());
@@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergePred(I->second);
}
- break;
}
// Visit all the instructions, top-down.
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d13e4ab..6d27db1 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// changeToUnreachable - Insert an unreachable instruction before the specified
/// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
}
}
-/// ChangeToCall - Convert the specified invoke into a normal call.
-static void ChangeToCall(InvokeInst *II) {
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
@@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) {
II->eraseFromParent();
}
-static bool MarkAliveBlocks(BasicBlock *BB,
+static bool markAliveBlocks(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 128> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
@@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
++BBI;
if (!isa<UnreachableInst>(BBI)) {
// Don't insert a call to llvm.trap right before the unreachable.
- ChangeToUnreachable(BBI, false);
+ changeToUnreachable(BBI, false);
Changed = true;
}
break;
@@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
- ChangeToUnreachable(SI, true);
+ changeToUnreachable(SI, true);
Changed = true;
break;
}
@@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Value *Callee = II->getCalledValue();
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- ChangeToUnreachable(II, true);
+ changeToUnreachable(II, true);
Changed = true;
} else if (II->doesNotThrow()) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
II->getUnwindDest()->removePredecessor(II->getParent());
II->eraseFromParent();
} else
- ChangeToCall(II);
+ changeToCall(II);
Changed = true;
}
}
@@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB,
return Changed;
}
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-static bool RemoveUnreachableBlocksFromFn(Function &F) {
+static bool removeUnreachableBlocksFromFn(Function &F) {
SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
return true;
}
-/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
-static bool MergeEmptyReturnBlocks(Function &F) {
+static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
BasicBlock *RetBlock = 0;
@@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) {
return Changed;
}
-/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
- bool EverChanged = RemoveUnreachableBlocksFromFn(F);
- EverChanged |= MergeEmptyReturnBlocks(F);
- EverChanged |= IterativeSimplifyCFG(F, TD);
+ bool EverChanged = removeUnreachableBlocksFromFn(F);
+ EverChanged |= mergeEmptyReturnBlocks(F);
+ EverChanged |= iterativelySimplifyCFG(F, TD);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
- // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
- // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
+ // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
- // RemoveUnreachableBlocksFromFn doesn't do anything.
- if (!RemoveUnreachableBlocksFromFn(F))
+ // avoid reruning iterativelySimplifyCFG if the second pass of
+ // removeUnreachableBlocksFromFn doesn't do anything.
+ if (!removeUnreachableBlocksFromFn(F))
return true;
do {
- EverChanged = IterativeSimplifyCFG(F, TD);
- EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ EverChanged = iterativelySimplifyCFG(F, TD);
+ EverChanged |= removeUnreachableBlocksFromFn(F);
} while (EverChanged);
return true;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 3904419..65311fe 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -38,6 +39,10 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of library calls simplified");
STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
//===----------------------------------------------------------------------===//
// Optimizer Base Class
//===----------------------------------------------------------------------===//
@@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
-// 'cos*' Optimizations
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+//===---------------------------------------===//
+// 'cos*' Optimizations
struct CosOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
// cos(-x) -> cos(x)
Value *Op1 = CI->getArgOperand(0);
@@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization {
BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
}
- return 0;
+ return Ret;
}
};
@@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization {
struct PowOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
@@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization {
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return 0;
+ if (Op2C == 0) return Ret;
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
@@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization {
struct Exp2Opt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
@@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization {
return CI;
}
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
- !FT->getParamType(0)->isDoubleTy())
- return 0;
-
- // If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
-
- // floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
- V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
+ return Ret;
}
};
@@ -1534,7 +1571,8 @@ namespace {
StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
+ UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
// Integer Optimizations
FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
ToAsciiOpt ToAscii;
@@ -1547,10 +1585,13 @@ namespace {
public:
static char ID; // Pass identification
SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
- StpCpy(false), StpCpyChk(true) {
+ StpCpy(false), StpCpyChk(true),
+ UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
+ void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
Optimizations[TLI->getName(F)] = Opt;
}
+void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
+ LibCallOptimization* Opt) {
+ if (TLI->has(F1) && TLI->has(F2))
+ Optimizations[TLI->getName(F1)] = Opt;
+}
+
/// Optimizations - Populate the Optimizations map with all the optimizations
/// we know.
void SimplifyLibCalls::InitOptimizations() {
@@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
- if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
- Optimizations["fabs"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
- Optimizations["floor"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
- Optimizations["ceil"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
- Optimizations["round"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
- Optimizations["rint"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
- Optimizations["nearbyint"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
- Optimizations["trunc"] = &UnaryDoubleFP;
+ AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
+ AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
+ AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
+ AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
+ AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
+
+ if(UnsafeFPShrink) {
+ AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+ }
// Integer Optimizations
Optimizations["ffs"] = &FFS;
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index d831452..1e6586b 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const {
OS << ']';
}
+#ifndef NDEBUG
void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2679b93..75a7817 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
/// is dead. Also recursively delete any operands that become dead as
/// a result. This includes tracing the def-use list from the PHI to see if
/// it is ultimately unused or if it reaches an unused cycle.
-bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
// or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
SmallVector<WeakVH, 8> PHIs;
@@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
bool Changed = false;
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
- Changed |= RecursivelyDeleteDeadPHINode(PN);
+ Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
return Changed;
}
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 0000000..30d60be
--- /dev/null
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,253 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bypass-slow-division"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
+using namespace llvm;
+
+namespace {
+ struct DivOpInfo {
+ bool SignedOp;
+ Value *Dividend;
+ Value *Divisor;
+
+ DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+ };
+
+ struct DivPhiNodes {
+ PHINode *Quotient;
+ PHINode *Remainder;
+
+ DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<DivOpInfo> {
+ static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+ return Val1.SignedOp == Val2.SignedOp &&
+ Val1.Dividend == Val2.Dividend &&
+ Val1.Divisor == Val2.Divisor;
+ }
+
+ static DivOpInfo getEmptyKey() {
+ return DivOpInfo(false, 0, 0);
+ }
+
+ static DivOpInfo getTombstoneKey() {
+ return DivOpInfo(true, 0, 0);
+ }
+
+ static unsigned getHashValue(const DivOpInfo &Val) {
+ return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+ reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ (unsigned)Val.SignedOp;
+ }
+ };
+
+ typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+}
+
+// insertFastDiv - Substitutes the div/rem instruction with code that checks the
+// value of the operands and uses a shorter-faster div/rem instruction when
+// possible and the longer-slower div/rem instruction otherwise.
+static bool insertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ Value *Dividend = Instr->getOperand(0);
+ Value *Divisor = Instr->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor) ||
+ (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
+ // Operations with immediate values should have
+ // been solved and replaced during compile time.
+ return false;
+ }
+
+ // Basic Block is split before divide
+ BasicBlock *MainBB = I;
+ BasicBlock *SuccessorBB = I->splitBasicBlock(J);
+ ++I; //advance iterator I to successorBB
+
+ // Add new basic block for slow divide operation
+ BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ SlowBB->moveBefore(SuccessorBB);
+ IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
+ Value *SlowQuotientV;
+ Value *SlowRemainderV;
+ if (UseSignedOp) {
+ SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ } else {
+ SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
+ SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ }
+ SlowBuilder.CreateBr(SuccessorBB);
+
+ // Add new basic block for fast divide operation
+ BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ FastBB->moveBefore(SlowBB);
+ IRBuilder<> FastBuilder(FastBB, FastBB->begin());
+ Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
+ BypassType);
+ Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
+ BypassType);
+
+ // udiv/urem because optimization only handles positive numbers
+ Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
+ ShortDivisorV);
+ Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
+ ShortDivisorV);
+ Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortQuotientV,
+ Dividend->getType());
+ Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
+ ShortRemainderV,
+ Dividend->getType());
+ FastBuilder.CreateBr(SuccessorBB);
+
+ // Phi nodes for result of div and rem
+ IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
+ PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ QuoPhi->addIncoming(SlowQuotientV, SlowBB);
+ QuoPhi->addIncoming(FastQuotientV, FastBB);
+ PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+ RemPhi->addIncoming(SlowRemainderV, SlowBB);
+ RemPhi->addIncoming(FastRemainderV, FastBB);
+
+ // Replace Instr with appropriate phi node
+ if (UseDivOp)
+ Instr->replaceAllUsesWith(QuoPhi);
+ else
+ Instr->replaceAllUsesWith(RemPhi);
+ Instr->eraseFromParent();
+
+ // Combine operands into a single value with OR for value testing below
+ MainBB->getInstList().back().eraseFromParent();
+ IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+ // BitMask is inverted to check if the operands are
+ // larger than the bypass type
+ uint64_t BitMask = ~BypassType->getBitMask();
+ Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values and branch
+ Value *ZeroV = MainBuilder.getInt32(0);
+ Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
+ MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
+
+ // point iterator J at first instruction of successorBB
+ J = I->begin();
+
+ // Cache phi nodes to be used later in place of other instances
+ // of div or rem with the same sign, dividend, and divisor
+ DivOpInfo Key(UseSignedOp, Dividend, Divisor);
+ DivPhiNodes Value(QuoPhi, RemPhi);
+ PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
+ return true;
+}
+
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
+// operands and operation are identical. Otherwise call insertFastDiv to perform
+// the optimization and cache the resulting dividend and remainder.
+static bool reuseOrInsertFastDiv(Function &F,
+ Function::iterator &I,
+ BasicBlock::iterator &J,
+ IntegerType *BypassType,
+ bool UseDivOp,
+ bool UseSignedOp,
+ DivCacheTy &PerBBDivCache) {
+ // Get instruction operands
+ Instruction *Instr = J;
+ DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+ DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
+
+ if (CacheI == PerBBDivCache.end()) {
+ // If previous instance does not exist, insert fast div
+ return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
+ PerBBDivCache);
+ }
+
+ // Replace operation value with previously generated phi node
+ DivPhiNodes &Value = CacheI->second;
+ if (UseDivOp) {
+ // Replace all uses of div instruction with quotient phi node
+ J->replaceAllUsesWith(Value.Quotient);
+ } else {
+ // Replace all uses of rem instruction with remainder phi node
+ J->replaceAllUsesWith(Value.Remainder);
+ }
+
+ // Advance to next operation
+ ++J;
+
+ // Remove redundant operation
+ Instr->eraseFromParent();
+ return true;
+}
+
+// bypassSlowDivision - This optimization identifies DIV instructions that can
+// be profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(Function &F,
+ Function::iterator &I,
+ const DenseMap<Type *, Type *> &BypassTypeMap) {
+ DivCacheTy DivCache;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+
+ // Get instruction details
+ unsigned Opcode = J->getOpcode();
+ bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
+ bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
+ bool UseSignedOp = Opcode == Instruction::SDiv ||
+ Opcode == Instruction::SRem;
+
+ // Only optimize div or rem ops
+ if (!UseDivOp && !UseRemOp)
+ continue;
+
+ // Continue if div/rem type is not bypassed
+ DenseMap<Type *, Type *>::const_iterator BT =
+ BypassTypeMap.find(J->getType());
+ if (BT == BypassTypeMap.end())
+ continue;
+
+ IntegerType *BypassType = cast<IntegerType>(BT->second);
+ MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
+ UseSignedOp, DivCache);
+ }
+
+ return MadeChange;
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 4ff31ca..215a16f 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
+ BypassSlowDivision.cpp
CloneFunction.cpp
CloneModule.cpp
CmpInstAnalysis.cpp
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index bed7d72..0601433 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -52,7 +52,8 @@ using namespace llvm;
/// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
/// conditions and indirectbr addresses this might make dead if
/// DeleteDeadConditions is true.
-bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+ const TargetLibraryInfo *TLI) {
TerminatorInst *T = BB->getTerminator();
IRBuilder<> Builder(T);
@@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Cond = BI->getCondition();
BI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
return false;
@@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Cond = SI->getCondition();
SI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
return true;
}
@@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
Value *Address = IBI->getAddress();
IBI->eraseFromParent();
if (DeleteDeadConditions)
- RecursivelyDeleteTriviallyDeadInstructions(Address);
+ RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
// If we didn't find our destination in the IBI successor list, then we
// have undefined behavior. Replace the unconditional branch with an
@@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
/// isInstructionTriviallyDead - Return true if the result produced by the
/// instruction is not used, and the instruction has no side effects.
///
-bool llvm::isInstructionTriviallyDead(Instruction *I) {
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
// We don't want the landingpad instruction removed by anything this general.
@@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
return isa<UndefValue>(II->getArgOperand(1));
}
- if (isAllocLikeFn(I)) return true;
+ if (isAllocLikeFn(I, TLI)) return true;
- if (CallInst *CI = isFreeCall(I))
+ if (CallInst *CI = isFreeCall(I, TLI))
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
return C->isNullValue() || isa<UndefValue>(C);
@@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
/// trivially dead instruction, delete it. If that makes any of its operands
/// trivially dead, delete them too, recursively. Return true if any
/// instructions were deleted.
-bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+ const TargetLibraryInfo *TLI) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+ if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
@@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
// operand, and if it is 'trivially' dead, delete it in a future loop
// iteration.
if (Instruction *OpI = dyn_cast<Instruction>(OpV))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
DeadInsts.push_back(OpI);
}
@@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) {
/// either forms a cycle or is terminated by a trivially dead instruction,
/// delete it. If that makes any of its operands trivially dead, delete them
/// too, recursively. Return true if a change was made.
-bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+ const TargetLibraryInfo *TLI) {
SmallPtrSet<Instruction*, 4> Visited;
for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
I = cast<Instruction>(*I->use_begin())) {
if (I->use_empty())
- return RecursivelyDeleteTriviallyDeadInstructions(I);
+ return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
// If we find an instruction more than once, we're on a cycle that
// won't prove fruitful.
if (!Visited.insert(I)) {
// Break the cycle and delete the instruction and its operands.
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+ (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
return true;
}
}
@@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
///
/// This returns true if it changed the code, note that it can delete
/// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
bool MadeChange = false;
#ifndef NDEBUG
@@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
continue;
}
- MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
if (BIHandle != BI)
BI = BB->begin();
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 518df7c..32d7fa1 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,6 +22,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -101,14 +103,14 @@ public:
///
static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
if (SI1 == SI2) return false; // Can't merge with self!
-
+
// It is not safe to merge these two switch instructions if they have a common
// successor, and if that successor has a PHI node, and if *that* PHI node has
// conflicting incoming values from the two switch blocks.
BasicBlock *SI1BB = SI1->getParent();
BasicBlock *SI2BB = SI2->getParent();
SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
-
+
for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
if (SI1Succs.count(*I))
for (BasicBlock::iterator BBI = (*I)->begin();
@@ -118,7 +120,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
PN->getIncomingValueForBlock(SI2BB))
return false;
}
-
+
return true;
}
@@ -135,7 +137,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
assert(SI1->isUnconditional() && SI2->isConditional());
// We fold the unconditional branch if we can easily update all PHI nodes in
- // common successors:
+ // common successors:
// 1> We have a constant incoming value for the conditional branch;
// 2> We have "Cond" as the incoming value for the unconditional branch;
// 3> SI2->getCondition() and Cond have same operands.
@@ -170,7 +172,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
-
+
PHINode *PN;
for (BasicBlock::iterator I = Succ->begin();
(PN = dyn_cast<PHINode>(I)); ++I)
@@ -222,7 +224,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// doesn't dominate BB.
if (Pred2->getSinglePredecessor() == 0)
return 0;
-
+
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
if (Pred1Br->getSuccessor(0) == BB &&
@@ -252,7 +254,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// Otherwise, if this is a conditional branch, then we can use it!
BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
if (BI == 0) return 0;
-
+
assert(BI->isConditional() && "Two successors but not conditional?");
if (BI->getSuccessor(0) == Pred1) {
IfTrue = Pred1;
@@ -345,7 +347,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
if (AggressiveInsts == 0) return false;
-
+
// If we have seen this instruction before, don't count it again.
if (AggressiveInsts->count(I)) return true;
@@ -411,7 +413,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
-
+
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
@@ -420,21 +422,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Vals.push_back(C);
return I->getOperand(0);
}
-
+
// If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
// the set.
ConstantRange Span =
ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
-
+
// If this is an and/!= check then we want to optimize "x ugt 2" into
// x != 0 && x != 1.
if (!isEQ)
Span = Span.inverse();
-
+
// If there are a ton of values, we don't want to make a ginormous switch.
if (Span.getSetSize().ugt(8) || Span.isEmptySet())
return 0;
-
+
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
@@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
}
return 0;
}
-
+
// Otherwise, we can only handle an | or &, depending on isEQ.
if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
return 0;
-
+
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
@@ -467,12 +469,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Extra = I->getOperand(1);
return LHS;
}
-
+
Vals.resize(NumValsBeforeLHS);
UsedICmps = UsedICmpsBeforeLHS;
return 0;
}
-
+
// If the LHS can't be folded in, but Extra is available and RHS can, try to
// use LHS as Extra.
if (Extra == 0 || Extra == I->getOperand(0)) {
@@ -484,7 +486,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
assert(Vals.size() == NumValsBeforeLHS);
Extra = OldExtra;
}
-
+
return 0;
}
@@ -615,6 +617,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
assert(ThisVal && "This isn't a value comparison!!");
if (ThisVal != PredVal) return false; // Different predicates.
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Find out information about when control will move from Pred to TI's block.
std::vector<ValueEqualityComparisonCase> PredCases;
BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
@@ -634,7 +639,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// can simplify TI.
if (!ValuesOverlap(PredCases, ThisCases))
return false;
-
+
if (isa<BranchInst>(TI)) {
// Okay, one of the successors of this condbr is dead. Convert it to a
// uncond br.
@@ -652,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
EraseTerminatorInstAndDCECond(TI);
return true;
}
-
+
SwitchInst *SI = cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
SmallPtrSet<Constant*, 16> DeadCases;
@@ -673,7 +678,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
-
+
// Otherwise, TI's block must correspond to some matched value. Find out
// which value (or set of values) this is.
ConstantInt *TIV = 0;
@@ -729,8 +734,8 @@ namespace {
}
static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt**)P1;
- const ConstantInt *RHS = *(const ConstantInt**)P2;
+ const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+ const ConstantInt *RHS = *(const ConstantInt*const*)P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -738,6 +743,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) {
return -1;
}
+static inline bool HasBranchWeights(const Instruction* I) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0))
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+ return MDS->getString().equals("branch_weights");
+
+ return false;
+}
+
+/// Tries to get a branch weight for the given instruction, returns NULL if it
+/// can't. Pos starts at 0.
+static ConstantInt* GetWeight(Instruction* I, int Pos) {
+ MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ if (ProfMD && ProfMD->getOperand(0)) {
+ if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) {
+ if (MDS->getString().equals("branch_weights")) {
+ assert(ProfMD->getNumOperands() >= 3);
+ return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos));
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// Scale the given weights based on the successor TI's metadata. Scaling is
+/// done by multiplying every weight by the sum of the successor's weights.
+static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) {
+ // Sum the successor's weights
+ assert(HasBranchWeights(STI));
+ unsigned Scale = 0;
+ MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof);
+ for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i));
+ assert(CI);
+ Scale += CI->getValue().getZExtValue();
+ }
+
+ // Skip default, as it's replaced during the folding
+ for (unsigned i = 1; i < Weights.size(); ++i) {
+ Weights[i] *= Scale;
+ }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+ bool Halve = false;
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ if (Weights[i] > UINT_MAX) {
+ Halve = true;
+ break;
+ }
+
+ if (! Halve)
+ return;
+
+ for (unsigned i = 0; i < Weights.size(); ++i)
+ Weights[i] /= 2;
+}
+
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
/// equality comparison instruction (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
@@ -770,6 +836,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// build.
SmallVector<BasicBlock*, 8> NewSuccessors;
+ // Update the branch weight metadata along the way
+ SmallVector<uint64_t, 8> Weights;
+ uint64_t PredDefaultWeight = 0;
+ bool PredHasWeights = HasBranchWeights(PTI);
+ bool SuccHasWeights = HasBranchWeights(TI);
+
+ if (PredHasWeights) {
+ MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof);
+ assert(MD);
+ for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+ ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+ assert(CI);
+ Weights.push_back(CI->getValue().getZExtValue());
+ }
+
+ // If the predecessor is a conditional eq, then swap the default weight
+ // to be the first entry.
+ if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) {
+ assert(Weights.size() == 2);
+ ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+ std::swap(Weights.front(), Weights.back());
+ }
+ }
+
+ PredDefaultWeight = Weights.front();
+ } else if (SuccHasWeights) {
+ // If there are no predecessor weights but there are successor weights,
+ // populate Weights with 1, which will later be scaled to the sum of
+ // successor's weights
+ Weights.assign(1 + PredCases.size(), 1);
+ PredDefaultWeight = 1;
+ }
+
+ uint64_t SuccDefaultWeight = 0;
+ if (SuccHasWeights) {
+ int Index = 0;
+ if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+ ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ assert(ICI);
+
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ Index = 1;
+ }
+
+ SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue();
+ }
+
if (PredDefault == BB) {
// If this is the default destination from PTI, only the edges in TI
// that don't occur in PTI, or that branch to BB will be activated.
@@ -780,6 +895,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
else {
// The default destination is BB, we don't need explicit targets.
std::swap(PredCases[i], PredCases.back());
+
+ if (PredHasWeights) {
+ std::swap(Weights[i+1], Weights.back());
+ Weights.pop_back();
+ }
+
PredCases.pop_back();
--i; --e;
}
@@ -790,14 +911,34 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
PredDefault = BBDefault;
NewSuccessors.push_back(BBDefault);
}
+
+ if (SuccHasWeights) {
+ ScaleWeights(TI, Weights);
+ Weights.front() *= SuccDefaultWeight;
+ } else if (PredHasWeights) {
+ Weights.front() /= (1 + BBCases.size());
+ }
+
for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
if (!PTIHandled.count(BBCases[i].Value) &&
BBCases[i].Dest != BBDefault) {
PredCases.push_back(BBCases[i]);
NewSuccessors.push_back(BBCases[i].Dest);
+ if (SuccHasWeights) {
+ Weights.push_back(PredDefaultWeight *
+ GetWeight(TI, i)->getValue().getZExtValue());
+ } else if (PredHasWeights) {
+ // Split the old default's weight amongst the children
+ Weights.push_back(PredDefaultWeight / (1 + BBCases.size()));
+ }
}
} else {
+ // FIXME: preserve branch weight metadata, similarly to the 'then'
+ // above. For now, drop it.
+ PredHasWeights = false;
+ SuccHasWeights = false;
+
// If this is not the default destination from PSI, only the edges
// in SI that occur in PSI with a destination of BB will be
// activated.
@@ -822,7 +963,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// If there are any constants vectored to BB that TI doesn't handle,
// they must go to the default destination of TI.
- for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
+ for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
@@ -851,6 +992,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+ if (PredHasWeights || SuccHasWeights) {
+ // Halve the weights if any of them cannot fit in an uint32_t
+ FitWeights(Weights);
+
+ SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+ NewSI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(MDWeights));
+ }
+
EraseTerminatorInstAndDCECond(PTI);
// Okay, last check. If BB is still a successor of PSI, then we must
@@ -984,11 +1136,11 @@ HoistTerminator:
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
if (BB1V == BB2V) continue;
-
+
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
+ if (SI == 0)
SI = cast<SelectInst>
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName()+"."+BB2V->getName()));
@@ -1056,7 +1208,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// Do not hoist the instruction if any of its operands are defined but not
// used in this BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
i != e; ++i) {
Instruction *OpI = dyn_cast<Instruction>(*i);
if (OpI && OpI->getParent() == BIParent &&
@@ -1112,7 +1264,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
// as well.
if (PHIs.empty())
return false;
-
+
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
@@ -1162,13 +1314,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
unsigned Size = 0;
-
+
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (Size > 10) return false; // Don't clone large BB's.
++Size;
-
+
// We can only support instructions that do not define values that are
// live outside of the current basic block.
for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
@@ -1176,7 +1328,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB || isa<PHINode>(U)) return false;
}
-
+
// Looks ok, continue checking.
}
@@ -1194,31 +1346,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
// outside of the block.
if (!PN || PN->getParent() != BB || !PN->hasOneUse())
return false;
-
+
// Degenerate case of a single entry PHI.
if (PN->getNumIncomingValues() == 1) {
FoldSingleEntryPHINodes(PN->getParent());
- return true;
+ return true;
}
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
-
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
-
+
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
BasicBlock *PredBB = PN->getIncomingBlock(i);
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
-
+
if (RealDest == BB) continue; // Skip self loops.
// Skip if the predecessor's terminator is an indirect branch.
if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
-
+
// The dest block might have PHI nodes, other predecessors and other
// difficult cases. Instead of being smart about this, just insert a new
// block that jumps to the destination block, effectively splitting
@@ -1227,7 +1379,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
RealDest->getName()+".critedge",
RealDest->getParent(), RealDest);
BranchInst::Create(RealDest, EdgeBB);
-
+
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -1244,7 +1396,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
// Clone the instruction.
Instruction *N = BBI->clone();
if (BBI->hasName()) N->setName(BBI->getName()+".c");
-
+
// Update operands due to translation.
for (User::op_iterator i = N->op_begin(), e = N->op_end();
i != e; ++i) {
@@ -1252,7 +1404,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
if (PI != TranslateMap.end())
*i = PI->second;
}
-
+
// Check for trivial simplification.
if (Value *V = SimplifyInstruction(N, TD)) {
TranslateMap[BBI] = V;
@@ -1297,7 +1449,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// Don't bother if the branch will be constant folded trivially.
isa<ConstantInt>(IfCond))
return false;
-
+
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
// At some point this becomes non-profitable (particularly if the target
@@ -1307,14 +1459,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
if (NumPhis > 2)
return false;
-
+
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction*, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
-
+
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
if (Value *V = SimplifyInstruction(PN, TD)) {
@@ -1322,19 +1474,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
PN->eraseFromParent();
continue;
}
-
+
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
MaxCostVal0) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
MaxCostVal1))
return false;
}
-
+
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
if (PN == 0) return true;
-
+
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
if (PN->getType()->isIntegerTy(1) &&
@@ -1342,7 +1494,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
isa<BinaryOperator>(PN->getIncomingValue(1)) ||
isa<BinaryOperator>(IfCond)))
return false;
-
+
// If we all PHI nodes are promotable, check to make sure that all
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
@@ -1362,7 +1514,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
IfBlock2 = 0;
} else {
@@ -1375,15 +1527,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
return false;
}
}
-
+
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
-
+
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
Instruction *InsertPt = DomBlock->getTerminator();
IRBuilder<true, NoFolder> Builder(InsertPt);
-
+
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
if (IfBlock1)
@@ -1394,19 +1546,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
DomBlock->getInstList().splice(InsertPt,
IfBlock2->getInstList(), IfBlock2->begin(),
IfBlock2->getTerminator());
-
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
-
- SelectInst *NV =
+
+ SelectInst *NV =
cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
PN->replaceAllUsesWith(NV);
NV->takeName(PN);
PN->eraseFromParent();
}
-
+
// At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
@@ -1420,14 +1572,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
/// to two returning blocks, try to merge them together into one return,
/// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
IRBuilder<> &Builder) {
assert(BI->isConditional() && "Must be a conditional branch");
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-
+
// Check to ensure both blocks are empty (just a return) or optionally empty
// with PHI nodes. If there are other instructions, merging would cause extra
// computation on one path or the other.
@@ -1447,12 +1599,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
EraseTerminatorInstAndDCECond(BI);
return true;
}
-
+
// Otherwise, figure out what the true and false return values are
// so we can insert a new select instruction.
Value *TrueValue = TrueRet->getReturnValue();
Value *FalseValue = FalseRet->getReturnValue();
-
+
// Unwrap any PHI nodes in the return blocks.
if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
if (TVPN->getParent() == TrueSucc)
@@ -1460,7 +1612,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
if (FVPN->getParent() == FalseSucc)
FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
-
+
// In order for this transformation to be safe, we must be able to
// unconditionally execute both operands to the return. This is
// normally the case, but we could have a potentially-trapping
@@ -1472,12 +1624,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
if (FCV->canTrap())
return false;
-
+
// Okay, we collected all the mapped values and checked them for sanity, and
// defined to really do this transformation. First, update the CFG.
TrueSucc->removePredecessor(BI->getParent());
FalseSucc->removePredecessor(BI->getParent());
-
+
// Insert select instructions where needed.
Value *BrCond = BI->getCondition();
if (TrueValue) {
@@ -1491,15 +1643,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
}
}
- Value *RI = !TrueValue ?
+ Value *RI = !TrueValue ?
Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
(void) RI;
-
+
DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
<< "\n " << *BI << "NewRet = " << *RI
<< "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
-
+
EraseTerminatorInstAndDCECond(BI);
return true;
@@ -1600,7 +1752,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (Cond == 0)
return false;
}
-
+
if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
@@ -1623,7 +1775,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
-
+
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
}
@@ -1631,13 +1783,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Only a single bonus inst is allowed.
if (&*FrontIt != Cond)
return false;
-
+
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
// Ingore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
-
+
if (&*CondIt != BI)
return false;
@@ -1649,7 +1801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
if (CE->canTrap())
return false;
-
+
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
@@ -1659,22 +1811,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *PredBlock = *PI;
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
-
+
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
SmallVector<PHINode*, 4> PHIs;
if (PBI == 0 || PBI->isUnconditional() ||
- (BI->isConditional() &&
+ (BI->isConditional() &&
!SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
!isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
continue;
-
+
// Determine if the two branches share a common destination.
Instruction::BinaryOps Opc;
bool InvertPredCond = false;
-
+
if (BI->isConditional()) {
if (PBI->getSuccessor(0) == TrueDest)
Opc = Instruction::Or;
@@ -1693,7 +1845,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
- // must already have been resolved, so we won't be inhibiting the
+ // must already have been resolved, so we won't be inhibiting the
// out-of-order core by speculating them earlier.
if (BonusInst) {
// Collect the values used by the bonus inst
@@ -1707,47 +1859,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
-
+
// Walk up to four levels back up the use-def chain of the predecessor's
// terminator to see if all those values were used. The choice of four
// levels is arbitrary, to provide a compile-time-cost bound.
while (!Worklist.empty()) {
std::pair<Value*, unsigned> Pair = Worklist.back();
Worklist.pop_back();
-
+
if (Pair.second >= 4) continue;
UsedValues.erase(Pair.first);
if (UsedValues.empty()) break;
-
+
if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
- }
+ }
}
-
+
if (!UsedValues.empty()) return false;
}
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
- IRBuilder<> Builder(PBI);
+ IRBuilder<> Builder(PBI);
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
Value *NewCond = PBI->getCondition();
-
+
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
CmpInst *CI = cast<CmpInst>(NewCond);
CI->setPredicate(CI->getInversePredicate());
} else {
- NewCond = Builder.CreateNot(NewCond,
+ NewCond = Builder.CreateNot(NewCond,
PBI->getCondition()->getName()+".not");
}
-
+
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
-
+
// If we have a bonus inst, clone it into the predecessor block.
Instruction *NewBonus = 0;
if (BonusInst) {
@@ -1756,7 +1908,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
NewBonus->takeName(BonusInst);
BonusInst->setName(BonusInst->getName()+".old");
}
-
+
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *New = Cond->clone();
@@ -1764,9 +1916,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
PredBlock->getInstList().insert(PBI, New);
New->takeName(Cond);
Cond->setName(New->getName()+".old");
-
+
if (BI->isConditional()) {
- Instruction *NewCond =
+ Instruction *NewCond =
cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
New, "or.cond"));
PBI->setCondition(NewCond);
@@ -1806,7 +1958,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
// PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
// is false: PBI_Cond and BI_Value
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::And,
PBI->getCondition(), New,
"and.cond"));
@@ -1814,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *NotCond =
cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
"not.cond"));
- MergedCond =
+ MergedCond =
cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
NotCond, MergedCond,
"or.cond"));
@@ -1921,7 +2073,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (isa<DbgInfoIntrinsic>(*I))
I->clone()->insertBefore(PBI);
-
+
return true;
}
return false;
@@ -1936,7 +2088,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
BasicBlock *BB = BI->getParent();
// If this block ends with a branch instruction, and if there is a
- // predecessor that ends on a branch of the same condition, make
+ // predecessor that ends on a branch of the same condition, make
// this conditional branch redundant.
if (PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
@@ -1945,11 +2097,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue));
return true; // Nuke the branch on constant.
}
-
+
// Otherwise, if there are multiple predecessors, insert a PHI that merges
// in the constant and simplify the block result. Subsequent passes of
// simplifycfg will thread the block.
@@ -1969,18 +2121,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
+ NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
CondIsTrue), P);
} else {
NewPN->addIncoming(BI->getCondition(), P);
}
}
-
+
BI->setCondition(NewPN);
return true;
}
}
-
+
// If this is a conditional branch in an empty block, and if any
// predecessors is a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
@@ -1991,11 +2143,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
if (&*BBI != BI)
return false;
-
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
if (CE->canTrap())
return false;
-
+
int PBIOp, BIOp;
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
PBIOp = BIOp = 0;
@@ -2007,31 +2159,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PBIOp = BIOp = 1;
else
return false;
-
+
// Check to make sure that the other destination of this branch
// isn't BB itself. If so, this is an infinite loop that will
// keep getting unwound.
if (PBI->getSuccessor(PBIOp) == BB)
return false;
-
- // Do not perform this transformation if it would require
+
+ // Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
-
+
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin();
isa<PHINode>(II); ++II, ++NumPhis)
if (NumPhis > 2) // Disable this xform.
return false;
-
+
// Finally, if everything is ok, fold the branches to logical ops.
BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
-
+
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
-
-
+
+
// If OtherDest *is* BB, then BB is a basic block with a single conditional
// branch in it, where one edge (OtherDest) goes back to itself but the other
// exits. We don't *know* that the program avoids the infinite loop
@@ -2046,13 +2198,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
"infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
OtherDest = InfLoopBlock;
- }
-
+ }
+
DEBUG(dbgs() << *PBI->getParent()->getParent());
// BI may have other predecessors. Because of this, we leave
// it alone, but modify PBI.
-
+
// Make sure we get to CommonDest on True&True directions.
Value *PBICond = PBI->getCondition();
IRBuilder<true, NoFolder> Builder(PBI);
@@ -2065,16 +2217,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Merge the conditions.
Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
-
+
// Modify PBI to branch on the new condition to the new dests.
PBI->setCondition(Cond);
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
-
+
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
-
+
// We know that the CommonDest already had an edge from PBI to
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
@@ -2092,10 +2244,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
PN->setIncomingValue(PBBIdx, NV);
}
}
-
+
DEBUG(dbgs() << "INTO: " << *PBI->getParent());
DEBUG(dbgs() << *PBI->getParent()->getParent());
-
+
// This basic block is probably dead. We know it has at least
// one fewer predecessor.
return true;
@@ -2214,7 +2366,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// br label %end
/// end:
/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
-///
+///
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
@@ -2228,17 +2380,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
Value *V = ICI->getOperand(0);
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
-
+
// The pattern we're looking for is where our only predecessor is a switch on
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
-
+
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
return false;
-
+
// If BB is reachable on a non-default case, then we simply know the value of
// V in this block. Substitute it and constant fold the icmp instruction
// away.
@@ -2246,7 +2398,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
ConstantInt *VVal = SI->findCaseDest(BB);
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
-
+
if (Value *V = SimplifyInstruction(ICI, TD)) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
@@ -2254,7 +2406,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// Ok, the block is reachable from the default dest. If the constant we're
// comparing exists in one of the other edges, then we can constant fold ICI
// and zap it.
@@ -2264,13 +2416,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
V = ConstantInt::getFalse(BB->getContext());
else
V = ConstantInt::getTrue(BB->getContext());
-
+
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
return SimplifyCFG(BB) | true;
}
-
+
// The use of the icmp has to be in the 'end' block, by the only PHI node in
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
@@ -2297,7 +2449,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
BB->getParent(), BB);
SI->addCase(Cst, NewBB);
-
+
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
@@ -2313,8 +2465,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
-
-
+
+
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
@@ -2323,7 +2475,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
bool TrueWhenEqual = true;
Value *ExtraCase = 0;
unsigned UsedICmps = 0;
-
+
if (Cond->getOpcode() == Instruction::Or) {
CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
UsedICmps);
@@ -2332,7 +2484,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
UsedICmps);
TrueWhenEqual = false;
}
-
+
// If we didn't have a multiply compared value, fail.
if (CompVal == 0) return false;
@@ -2344,21 +2496,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
// instruction can't handle, remove them now.
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
+
// If Extra was used, we require at least two switch values to do the
// transformation. A switch with one value is just an cond branch.
if (ExtraCase && Values.size() < 2) return false;
-
+
+ // TODO: Preserve branch weight metadata, similarly to how
+ // FoldValueComparisonIntoPredecessors preserves it.
+
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
+
BasicBlock *BB = BI->getParent();
-
+
DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n" << *BB);
-
+
// If there are any extra values that couldn't be folded into the switch
// then we evaluate them with an explicit branch first. Split the block
// right before the condbr to handle it.
@@ -2372,13 +2527,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
else
Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-
+
OldTI->eraseFromParent();
-
+
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
AddPredecessorToBlock(EdgeBB, BB, NewBB);
-
+
DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
<< "\nEXTRABB = " << *BB);
BB = NewBB;
@@ -2392,14 +2547,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
TD->getIntPtrType(CompVal->getContext()),
"magicptr");
}
-
+
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
// Add all of the 'cases' to the switch instruction.
for (unsigned i = 0, e = Values.size(); i != e; ++i)
New->addCase(Values[i], EdgeBB);
-
+
// We added edges from PI to the EdgeBB. As such, if there were any
// PHI nodes in EdgeBB, they need entries to be added corresponding to
// the number of edges added.
@@ -2410,10 +2565,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
PN->addIncoming(InVal, BB);
}
-
+
// Erase the old branch instruction.
EraseTerminatorInstAndDCECond(BI);
-
+
DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
return true;
}
@@ -2467,7 +2622,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-
+
// Find predecessors that end with branches.
SmallVector<BasicBlock*, 8> UncondBranchPreds;
SmallVector<BranchInst*, 8> CondBranchPreds;
@@ -2481,7 +2636,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
CondBranchPreds.push_back(BI);
}
}
-
+
// If we found some, do the transformation!
if (!UncondBranchPreds.empty() && DupRet) {
while (!UncondBranchPreds.empty()) {
@@ -2490,21 +2645,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
<< "INTO UNCOND BRANCH PRED: " << *Pred);
(void)FoldReturnIntoUncondBranch(RI, BB, Pred);
}
-
+
// If we eliminated all predecessors of the block, delete the block now.
if (pred_begin(BB) == pred_end(BB))
// We know there are no successors, so just nuke the block.
BB->eraseFromParent();
-
+
return true;
}
-
+
// Check out all of the conditional branches going to this return
// instruction. If any of them just select between returns, change the
// branch itself into a select/return pair.
while (!CondBranchPreds.empty()) {
BranchInst *BI = CondBranchPreds.pop_back_val();
-
+
// Check to see if the non-BB successor is also a return block.
if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
@@ -2516,9 +2671,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
-
+
bool Changed = false;
-
+
// If there are any instructions immediately before the unreachable that can
// be removed, do so.
while (UI != BB->begin()) {
@@ -2558,11 +2713,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BBI->eraseFromParent();
Changed = true;
}
-
+
// If the unreachable instruction is the first in the block, take a gander
// at all of the predecessors of this instruction, and simplify them.
if (&BB->front() != UI) return Changed;
-
+
SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
TerminatorInst *TI = Preds[i]->getTerminator();
@@ -2615,7 +2770,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
BasicBlock *MaxBlock = 0;
for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second.first > MaxPop ||
+ if (I->second.first > MaxPop ||
(I->second.first == MaxPop && MaxIndex > I->second.second)) {
MaxPop = I->second.first;
MaxIndex = I->second.second;
@@ -2627,13 +2782,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// edges to it.
SI->setDefaultDest(MaxBlock);
Changed = true;
-
+
// If MaxBlock has phinodes in it, remove MaxPop-1 entries from
// it.
if (isa<PHINode>(MaxBlock->begin()))
for (unsigned i = 0; i != MaxPop-1; ++i)
MaxBlock->removePredecessor(SI->getParent());
-
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i)
if (i.getCaseSuccessor() == MaxBlock) {
@@ -2648,7 +2803,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// place to note that the call does not throw though.
BranchInst *BI = Builder.CreateBr(II->getNormalDest());
II->removeFromParent(); // Take out of symbol table
-
+
// Insert the call now...
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
Builder.SetInsertPoint(BI);
@@ -2663,7 +2818,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
}
-
+
// If this block is now dead, remove it.
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock()) {
@@ -2823,6 +2978,285 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
return Changed;
}
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return CE->isGEPWithNoNotionalOverIndexing();
+
+ return isa<ConstantFP>(C) ||
+ isa<ConstantInt>(C) ||
+ isa<ConstantPointerNull>(C) ||
+ isa<GlobalValue>(C) ||
+ isa<UndefValue>(C);
+}
+
+/// GetCaseResulsts - Try to determine the resulting constant values in phi
+/// nodes at the common destination basic block for one of the case
+/// destinations of a switch instruction.
+static bool GetCaseResults(SwitchInst *SI,
+ BasicBlock *CaseDest,
+ BasicBlock **CommonDest,
+ SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+ // The block from which we enter the common destination.
+ BasicBlock *Pred = SI->getParent();
+
+ // If CaseDest is empty, continue to its successor.
+ if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() &&
+ !isa<PHINode>(CaseDest->begin())) {
+
+ TerminatorInst *Terminator = CaseDest->getTerminator();
+ if (Terminator->getNumSuccessors() != 1)
+ return false;
+
+ Pred = CaseDest;
+ CaseDest = Terminator->getSuccessor(0);
+ }
+
+ // If we did not have a CommonDest before, use the current one.
+ if (!*CommonDest)
+ *CommonDest = CaseDest;
+ // If the destination isn't the common one, abort.
+ if (CaseDest != *CommonDest)
+ return false;
+
+ // Get the values for this case from phi nodes in the destination block.
+ BasicBlock::iterator I = (*CommonDest)->begin();
+ while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+ int Idx = PHI->getBasicBlockIndex(Pred);
+ if (Idx == -1)
+ continue;
+
+ Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
+ if (!ConstVal)
+ return false;
+
+ // Be conservative about which kinds of constants we support.
+ if (!ValidLookupTableConstant(ConstVal))
+ return false;
+
+ Res.push_back(std::make_pair(PHI, ConstVal));
+ }
+
+ return true;
+}
+
+/// BuildLookupTable - Build a lookup table with the contents of Results, using
+/// DefaultResult to fill the holes in the table. If the table ends up
+/// containing the same result in each element, set *SingleResult to that value
+/// and return NULL.
+static GlobalVariable *BuildLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results,
+ Constant *DefaultResult,
+ Constant **SingleResult) {
+ assert(Results.size() && "Need values to build lookup table");
+ assert(TableSize >= Results.size() && "Table needs to hold all values");
+
+ // If all values in the table are equal, this is that value.
+ Constant *SameResult = Results.begin()->second;
+
+ // Build up the table contents.
+ std::vector<Constant*> TableContents(TableSize);
+ for (size_t I = 0, E = Results.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Results[I].first;
+ Constant *CaseRes = Results[I].second;
+
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ TableContents[Idx] = CaseRes;
+
+ if (CaseRes != SameResult)
+ SameResult = NULL;
+ }
+
+ // Fill in any holes in the table with the default result.
+ if (Results.size() < TableSize) {
+ for (unsigned i = 0; i < TableSize; ++i) {
+ if (!TableContents[i])
+ TableContents[i] = DefaultResult;
+ }
+
+ if (DefaultResult != SameResult)
+ SameResult = NULL;
+ }
+
+ // Same result was used in the entire table; just return that.
+ if (SameResult) {
+ *SingleResult = SameResult;
+ return NULL;
+ }
+
+ ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+ Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+ GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+ IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+ // FIXME: Handle unreachable cases.
+
+ // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+ // split off a dense part and build a lookup table for that.
+
+ // FIXME: If the results are all integers and the lookup table would fit in a
+ // target-legal register, we should store them as a bitmap and use shift/mask
+ // to look up the result.
+
+ // FIXME: This creates arrays of GEPs to constant strings, which means each
+ // GEP needs a runtime relocation in PIC code. We should just build one big
+ // string and lookup indices into that.
+
+ // Ignore the switch if the number of cases are too small.
+ // This is similar to the check when building jump tables in
+ // SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Determine the best cut-off.
+ if (SI->getNumCases() < 4)
+ return false;
+
+ // Figure out the corresponding result for each case value and phi node in the
+ // common destination, as well as the the min and max case values.
+ assert(SI->case_begin() != SI->case_end());
+ SwitchInst::CaseIt CI = SI->case_begin();
+ ConstantInt *MinCaseVal = CI.getCaseValue();
+ ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+ BasicBlock *CommonDest = NULL;
+ typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+ SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+ SmallDenseMap<PHINode*, Constant*> DefaultResults;
+ SmallDenseMap<PHINode*, Type*> ResultTypes;
+ SmallVector<PHINode*, 4> PHIs;
+
+ for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+ ConstantInt *CaseVal = CI.getCaseValue();
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = CaseVal;
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+ MaxCaseVal = CaseVal;
+
+ // Resulting value at phi nodes for this case value.
+ typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+ ResultsTy Results;
+ if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results))
+ return false;
+
+ // Append the result from this case to the list for each phi.
+ for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+ if (!ResultLists.count(I->first))
+ PHIs.push_back(I->first);
+ ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ }
+ }
+
+ // Get the resulting values for the default case.
+ SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+ if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList))
+ return false;
+ for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+ PHINode *PHI = DefaultResultsList[I].first;
+ Constant *Result = DefaultResultsList[I].second;
+ DefaultResults[PHI] = Result;
+ ResultTypes[PHI] = Result->getType();
+ }
+
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ // The table density should be at lest 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ // Be careful to avoid overlow in the density computation.
+ if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
+ return false;
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ if (SI->getNumCases() * 10 < TableSize * 4)
+ return false;
+
+ // Build the lookup tables.
+ SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
+ SmallDenseMap<PHINode*, Constant*> SingleResults;
+
+ Module &Mod = *CommonDest->getParent()->getParent();
+ for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
+ I != E; ++I) {
+ PHINode *PHI = *I;
+
+ Constant *SingleResult = NULL;
+ LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal,
+ ResultLists[PHI], DefaultResults[PHI],
+ &SingleResult);
+ SingleResults[PHI] = SingleResult;
+ }
+
+ // Create the BB that does the lookups.
+ BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Check whether the condition value is within the case range, and branch to
+ // the new BB.
+ Builder.SetInsertPoint(SI);
+ Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Populate the BB that does the lookups.
+ Builder.SetInsertPoint(LookupBB);
+ bool ReturnedEarly = false;
+ for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
+ I != E; ++I) {
+ PHINode *PHI = *I;
+ // There was a single result for this phi; just use that.
+ if (Constant *SingleResult = SingleResults[PHI]) {
+ PHI->addIncoming(SingleResult, LookupBB);
+ continue;
+ }
+
+ Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
+ Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
+ "switch.gep");
+ Value *Result = Builder.CreateLoad(GEP, "switch.load");
+
+ // If the result is only going to be used to return from the function,
+ // we want to do that right here.
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) {
+ if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) {
+ Builder.CreateRet(Result);
+ ReturnedEarly = true;
+ }
+ }
+
+ if (!ReturnedEarly)
+ PHI->addIncoming(Result, LookupBB);
+ }
+
+ if (!ReturnedEarly)
+ Builder.CreateBr(CommonDest);
+
+ // Remove the switch.
+ for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+ if (Succ == SI->getDefaultDest()) continue;
+ Succ->removePredecessor(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ ++NumLookupTables;
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If this switch is too complex to want to look at, ignore it.
if (!isValueEqualityComparison(SI))
@@ -2862,13 +3296,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
+ if (SwitchToLookupTable(SI, Builder))
+ return SimplifyCFG(BB) | true;
+
return false;
}
bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
BasicBlock *BB = IBI->getParent();
bool Changed = false;
-
+
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
@@ -2879,7 +3316,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
--i; --e;
Changed = true;
}
- }
+ }
if (IBI->getNumDestinations() == 0) {
// If the indirectbr has no successors, change it to unreachable.
@@ -2887,14 +3324,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (IBI->getNumDestinations() == 1) {
// If the indirectbr has one successor, change it to a direct branch.
BranchInst::Create(IBI->getDestination(0), IBI);
EraseTerminatorInstAndDCECond(IBI);
return true;
}
-
+
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
return SimplifyCFG(BB) | true;
@@ -2904,13 +3341,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
-
+
// If the Terminator is the only non-phi instruction, simplify the block.
BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
-
+
// If the only instruction in the block is a seteq/setne comparison
// against a constant, try to simplify the block.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
@@ -2921,7 +3358,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
return true;
}
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
@@ -2934,7 +3371,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
-
+
// Conditional branch
if (isValueEqualityComparison(BI)) {
// If we only have one predecessor, and if it is a branch on this value,
@@ -2943,7 +3380,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
return SimplifyCFG(BB) | true;
-
+
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
BasicBlock::iterator I = BB->begin();
@@ -2962,17 +3399,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return SimplifyCFG(BB) | true;
}
}
-
+
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
if (SimplifyBranchOnICmpChain(BI, TD, Builder))
return true;
-
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI))
return SimplifyCFG(BB) | true;
-
+
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
@@ -2999,14 +3436,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
return SimplifyCFG(BB) | true;
}
-
+
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, TD))
return SimplifyCFG(BB) | true;
-
+
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -3114,7 +3551,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
//
if (MergeBlockIntoPredecessor(BB))
return true;
-
+
IRBuilder<> Builder(BB);
// If there is a trivial two-entry PHI node in this basic block, and we can
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 81eb9e0..528e6a1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -72,7 +72,7 @@ namespace {
++NumSimplified;
Changed = true;
}
- Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
}
// Place the list of instructions to simplify on the next loop iteration
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 62d23cb..c09dcd2 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -601,7 +601,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, TD);
+ (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
return true;
}