aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp7
-rw-r--r--lib/Transforms/Scalar/GVN.cpp443
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp99
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp8
6 files changed, 363 insertions, 239 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 615c517..f0d29c8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 129af8d..996996d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -45,6 +45,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -498,6 +499,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +589,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +674,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -612,6 +693,7 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
@@ -1159,114 +1241,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1297,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1465,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1487,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1555,7 +1515,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+ SmallVector<BasicBlock *, 4> CriticalEdgePred;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1578,20 +1538,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return false;
}
- unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
- NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ CriticalEdgePred.push_back(Pred);
}
}
- if (!NeedToSplit.empty()) {
- toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
- return false;
- }
-
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
- "Fully available value should be eliminated above!");
+ "Fully available value should already be eliminated!");
// If this load is unavailable in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
@@ -1600,6 +1554,17 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (NumUnavailablePreds != 1)
return false;
+ // Split critical edges, and update the unavailable predecessors accordingly.
+ for (SmallVector<BasicBlock *, 4>::iterator I = CriticalEdgePred.begin(),
+ E = CriticalEdgePred.end(); I != E; I++) {
+ BasicBlock *OrigPred = *I;
+ BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
+ PredLoads.erase(OrigPred);
+ PredLoads[NewPred] = 0;
+ DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
+ << LoadBB->getName() << '\n');
+ }
+
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
@@ -1615,13 +1580,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1592,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1659,7 +1601,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- return false;
+ // HINT:Don't revert the edge-splitting as following transformation may
+ // also need to split these critial edges.
+ return !CriticalEdgePred.empty();
}
// Okay, we can eliminate this load by inserting a reload in the predecessor
@@ -1714,6 +1658,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
@@ -2296,8 +2306,6 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
- if (splitCriticalEdges())
- ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
@@ -2309,6 +2317,7 @@ bool GVN::runOnFunction(Function& F) {
Changed |= PREChanged;
}
}
+
// FIXME: Should perform GVN again after PRE does something. PRE can move
// computations into blocks where they become fully redundant. Note that
// we can't do this until PRE's critical edge splitting updates memdep.
@@ -2542,6 +2551,15 @@ bool GVN::performPRE(Function &F) {
return Changed;
}
+/// Split the critical edge connecting the given two blocks, and return
+/// the block inserted to the critical edge.
+BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+ BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+ if (MD)
+ MD->invalidateCachedPredecessors();
+ return BB;
+}
+
/// splitCriticalEdges - Split critical edges found during the previous
/// iteration that may enable further optimization.
bool GVN::splitCriticalEdges() {
@@ -2568,9 +2586,18 @@ bool GVN::iterateOnFunction(Function &F) {
RE = RPOT.end(); RI != RE; ++RI)
Changed |= processBlock(*RI);
#else
+ // Save the blocks this function have before transformation begins. GVN may
+ // split critical edge, and hence may invalidate the RPO/DT iterator.
+ //
+ std::vector<BasicBlock *> BBVect;
+ BBVect.reserve(256);
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- Changed |= processBlock(DI->getBlock());
+ BBVect.push_back(DI->getBlock());
+
+ for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
+ I != E; I++)
+ Changed |= processBlock(*I);
#endif
return Changed;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 8e76c78..df11e92 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L))
+ if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
continue;
// Computing the value outside of the loop brings no benefit if :
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73e44d7..b107fef 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -774,6 +774,16 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
}
namespace {
+class LSRUse;
+}
+// Check if it is legal to fold 2 base registers.
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F);
+// Get the cost of the scaling factor used in F for LU.
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
+
+namespace {
/// Cost - This class is used to measure and compare candidate formulae.
class Cost {
@@ -785,11 +795,12 @@ class Cost {
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
+ unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0) {}
+ SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
@@ -799,9 +810,9 @@ public:
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
- | ImmCost | SetupCost) != ~0u)
+ | ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
- & ImmCost & SetupCost) == ~0u);
+ & ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
@@ -810,12 +821,14 @@ public:
return NumRegs == ~0u;
}
- void RateFormula(const Formula &F,
+ void RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
@@ -900,12 +913,14 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
}
-void Cost::RateFormula(const Formula &F,
+void Cost::RateFormula(const TargetTransformInfo &TTI,
+ const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
+ const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
@@ -932,7 +947,12 @@ void Cost::RateFormula(const Formula &F,
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
if (NumBaseParts > 1)
- NumBaseAdds += NumBaseParts - 1;
+ // Do not count the base and a possible second register if the target
+ // allows to fold 2 registers.
+ NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+
+ // Accumulate non-free scaling amounts.
+ ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
@@ -955,6 +975,7 @@ void Cost::Loose() {
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
+ ScaleCost = ~0u;
}
/// operator< - Choose the lower cost.
@@ -967,6 +988,8 @@ bool Cost::operator<(const Cost &Other) const {
return NumIVMuls < Other.NumIVMuls;
if (NumBaseAdds != Other.NumBaseAdds)
return NumBaseAdds < Other.NumBaseAdds;
+ if (ScaleCost != Other.ScaleCost)
+ return ScaleCost < Other.ScaleCost;
if (ImmCost != Other.ImmCost)
return ImmCost < Other.ImmCost;
if (SetupCost != Other.SetupCost)
@@ -983,6 +1006,8 @@ void Cost::print(raw_ostream &OS) const {
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
+ if (ScaleCost != 0)
+ OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
@@ -1359,6 +1384,58 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
+static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
+ const Formula &F) {
+ // If F is used as an Addressing Mode, it may fold one Base plus one
+ // scaled register. If the scaled register is nil, do as if another
+ // element of the base regs is a 1-scaled register.
+ // This is possible if BaseRegs has at least 2 registers.
+
+ // If this is not an address calculation, this is not an addressing mode
+ // use.
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // F is already scaled.
+ if (F.Scale != 0)
+ return false;
+
+ // We need to keep one register for the base and one to scale.
+ if (F.BaseRegs.size() < 2)
+ return false;
+
+ return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
+ }
+
+static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ if (!F.Scale)
+ return 0;
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F) && "Illegal formula in use.");
+
+ switch (LU.Kind) {
+ case LSRUse::Address: {
+ int CurScaleCost = TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+ assert(CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!");
+ return CurScaleCost;
+ }
+ case LSRUse::ICmpZero:
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
+ // Therefore, return 0 in case F.Scale == -1.
+ return F.Scale != -1;
+
+ case LSRUse::Basic:
+ case LSRUse::Special:
+ return 0;
+ }
+
+ llvm_unreachable("Invalid LSRUse Kind!");
+}
+
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
@@ -3607,7 +3684,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
- CountTrailingZeros_64(NewF.BaseOffset))
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
@@ -3690,7 +3767,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// the corresponding bad register from the Regs set.
Cost CostF;
Regs.clear();
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
@@ -3726,7 +3803,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Cost CostBest;
Regs.clear();
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
+ DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
@@ -4079,7 +4157,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+ NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
+ LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be0f0e8..c325925 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -626,8 +626,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ if (!StructTy->isSized()) {
+ // The call may never return and hence the copy-instruction may never
+ // be executed, and therefore it's not safe to say "the destination
+ // has at least <cpyLen> bytes, as implied by the copy-instruction",
+ return false;
+ }
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {