aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt5
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp471
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp70
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp825
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp470
-rw-r--r--lib/Transforms/Scalar/GEPSplitter.cpp83
-rw-r--r--lib/Transforms/Scalar/GVN.cpp703
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp25
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp849
-rw-r--r--lib/Transforms/Scalar/LICM.cpp283
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp34
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp606
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp170
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp460
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp71
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp18
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp42
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp735
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp32
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp27
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp8
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp1152
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp159
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp1630
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp12
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp133
26 files changed, 5214 insertions, 3859 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 3f4ceb4..fcf914f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -6,12 +6,14 @@ add_llvm_library(LLVMScalarOpts
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
- GEPSplitter.cpp
+ EarlyCSE.cpp
GVN.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
LoopDeletion.cpp
+ LoopIdiomRecognize.cpp
+ LoopInstSimplify.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
@@ -24,7 +26,6 @@ add_llvm_library(LLVMScalarOpts
Scalar.cpp
ScalarReplAggregates.cpp
SimplifyCFGPass.cpp
- SimplifyHalfPowrLibCalls.cpp
SimplifyLibCalls.cpp
Sink.cpp
TailDuplication.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 35d02d9..887fa9f 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,6 +22,8 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -40,26 +42,40 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
using namespace llvm;
using namespace llvm::PatternMatch;
-STATISTIC(NumElim, "Number of blocks eliminated");
-
-static cl::opt<bool>
-CriticalEdgeSplit("cgp-critical-edge-splitting",
- cl::desc("Split critical edges during codegen prepare"),
- cl::init(false), cl::Hidden);
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ DominatorTree *DT;
ProfileInfo *PFI;
+
+ /// CurInstIterator - As we scan instructions optimizing them, this is the
+ /// next instruction to optimize. Xforms that can invalidate this should
+ /// update it.
+ BasicBlock::iterator CurInstIterator;
+
+ // Keeps track of non-local addresses that have been sunk into a block. This
+ // allows us to avoid inserting duplicate code for blocks with multiple
+ // load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
- /// BackEdges - Keep a set of all the loop back edges.
- ///
- SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -69,26 +85,21 @@ namespace {
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
- virtual void releaseMemory() {
- BackEdges.clear();
- }
-
private:
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs);
- bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInst(Instruction *I);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
- void findLoopBackEdges(const Function &F);
};
}
@@ -100,27 +111,15 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
}
-/// findLoopBackEdges - Do a DFS walk to find loop back edges.
-///
-void CodeGenPrepare::findLoopBackEdges(const Function &F) {
- SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
- FindFunctionBackedges(F, Edges);
-
- BackEdges.insert(Edges.begin(), Edges.end());
-}
-
-
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
- // Now find loop back edges.
- findLoopBackEdges(F);
-
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
@@ -128,6 +127,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= OptimizeBlock(*BB);
EverMadeChange |= MadeChange;
}
+
+ SunkAddrs.clear();
+
return EverMadeChange;
}
@@ -302,120 +304,23 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
+ if (DT) {
+ BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
+ BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+ DT->changeImmediateDominator(DestBB, NewIDom);
+ DT->eraseNode(BB);
+ }
if (PFI) {
PFI->replaceAllUses(BB, DestBB);
PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
}
BB->eraseFromParent();
- ++NumElim;
+ ++NumBlocksElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
-/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
-/// lives in to see if there is a block that we can reuse as a critical edge
-/// from TIBB.
-static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
- BasicBlock *Dest = DestPHI->getParent();
-
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
-
- /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
- unsigned TIBBEntryNo = 0;
-
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
- BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional())
- continue;
- // Must be empty other than the branch and debug info.
- BasicBlock::iterator I = Pred->begin();
- while (isa<DbgInfoIntrinsic>(I))
- I++;
- if (&*I != PredBr)
- continue;
- // Cannot be the entry block; its label does not get emitted.
- if (Pred == &Dest->getParent()->getEntryBlock())
- continue;
-
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same effect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- unsigned PredEntryNo = pi;
-
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size()) {
- if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
- TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
- TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
- }
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (PN->getIncomingBlock(PredEntryNo) != Pred)
- PredEntryNo = PN->getBasicBlockIndex(Pred);
-
- if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
- FoundMatch = false;
- break;
- }
- }
-
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch)
- return Pred;
- }
- return 0;
-}
-
-
-/// SplitEdgeNicely - Split the critical edge from TI to its specified
-/// successor if it will improve codegen. We only do this if the successor has
-/// phi nodes (otherwise critical edges are ok). If there is already another
-/// predecessor of the succ that is empty (and thus has no phi nodes), use it
-/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
- SmallSet<std::pair<const BasicBlock*,
- const BasicBlock*>, 8> &BackEdges,
- Pass *P) {
- BasicBlock *TIBB = TI->getParent();
- BasicBlock *Dest = TI->getSuccessor(SuccNum);
- assert(isa<PHINode>(Dest->begin()) &&
- "This should only be called if Dest has a PHI!");
- PHINode *DestPHI = cast<PHINode>(Dest->begin());
-
- // Do not split edges to EH landing pads.
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
- if (Invoke->getSuccessor(1) == Dest)
- return;
-
- // As a hack, never split backedges of loops. Even though the copy for any
- // PHIs inserted on the backedge would be dead for exits from the loop, we
- // assume that the cost of *splitting* the backedge would be too high.
- if (BackEdges.count(std::make_pair(TIBB, Dest)))
- return;
-
- if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
- ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PFI)
- PFI->splitEdge(TIBB, Dest, ReuseBB);
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, ReuseBB);
- return;
- }
-
- SplitCriticalEdge(TI, SuccNum, P, true);
-}
-
-
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
/// sink it into user blocks to reduce the number of virtual
@@ -486,6 +391,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
+ ++NumCastUses;
}
// If we removed all uses, nuke the cast.
@@ -543,6 +449,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
+ ++NumCmpUses;
}
// If we removed all uses, nuke the cmp.
@@ -569,14 +476,45 @@ protected:
} // end anonymous namespace
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (OptimizeInlineAsmInst(CI))
+ return true;
+ }
+
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
- CI->replaceAllUsesWith(RetVal);
- CI->eraseFromParent();
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
return true;
}
@@ -594,6 +532,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
CodeGenPrepareFortifiedLibCalls Simplifier;
return Simplifier.fold(CI, TD);
}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -616,13 +555,85 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs) {
- // Figure out what addressing mode will be built up for this operation.
+ const Type *AccessTy) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = 0;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
- AddrModeInsts, *TLI);
+ ExtAddrMode AddrMode;
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (Visited.count(V)) {
+ Consensus = 0;
+ break;
+ }
+
+ Visited.insert(V);
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+ worklist.push_back(P->getIncomingValue(i));
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode =
+ AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ NewAddrModeInsts, *TLI);
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
+ Consensus = V;
+ NumUsesConsensus = NumUses;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = 0;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) return false;
+
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
@@ -725,25 +736,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
- MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
- if (Addr->use_empty()) {
- RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ if (Repl->use_empty()) {
+ RecursivelyDeleteTriviallyDeadInstructions(Repl);
// This address is now available for reassignment, so erase the table entry;
// we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
}
+ ++NumMemoryInsts;
return true;
}
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs) {
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(CS);
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI->ParseConstraints(CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -753,8 +765,8 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
- Value *OpVal = const_cast<Value *>(CS.getArgument(ArgNo++));
- MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
@@ -798,6 +810,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// can fold it.
I->removeFromParent();
I->insertAfter(LI);
+ ++NumExtsMoved;
return true;
}
@@ -869,99 +882,93 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
// Replace a use of the {s|z}ext source with a use of the result.
TheUse = InsertedTrunc;
-
+ ++NumExtUses;
MadeChange = true;
}
return MadeChange;
}
-// In this pass we look for GEP and cast instructions that are used
-// across basic blocks and rewrite them to improve basic-block-at-a-time
-// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
- bool MadeChange = false;
-
- // Split all critical edges where the dest block has a PHI.
- if (CriticalEdgeSplit) {
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
- }
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
}
+ return false;
}
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
- // Keep track of non-local addresses that have been sunk into this block.
- // This allows us to avoid inserting duplicate code for blocks with multiple
- // load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
-
- for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
- Instruction *I = BBI++;
-
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // If the source of the cast is a constant, then this should have
- // already been constant folded. The only reason NOT to constant fold
- // it is if something (e.g. LSR) was careful to place the constant
- // evaluation in a block other than then one that uses it (e.g. to hoist
- // the address of globals out of a loop). If this is the case, we don't
- // want to forward-subst the cast.
- if (isa<Constant>(CI->getOperand(0)))
- continue;
-
- bool Change = false;
- if (TLI) {
- Change = OptimizeNoopCopyExpression(CI, *TLI);
- MadeChange |= Change;
- }
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ return true;
- if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
- MadeChange |= MoveExtToFormExtLoad(I);
- MadeChange |= OptimizeExtUses(I);
- }
- } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
- MadeChange |= OptimizeCmpExpression(CI);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
- SunkAddrs);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType(),
- SunkAddrs);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- if (GEPI->hasAllZeroIndices()) {
- /// The GEP operand must be a pointer, so must its result -> BitCast
- Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
- GEPI->replaceAllUsesWith(NC);
- GEPI->eraseFromParent();
- MadeChange = true;
- BBI = NC;
- }
- } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // If we found an inline asm expession, and if the target knows how to
- // lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
- if (TLI->ExpandInlineAsm(CI)) {
- BBI = BB.begin();
- // Avoid processing instructions out of order, which could cause
- // reuse before a value is defined.
- SunkAddrs.clear();
- } else
- // Sink address computing for memory operands into the block.
- MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
- } else {
- // Other CallInst optimizations that don't need to muck with the
- // enclosing iterator here.
- MadeChange |= OptimizeCallInst(CI);
- }
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ bool MadeChange = MoveExtToFormExtLoad(I);
+ return MadeChange | OptimizeExtUses(I);
}
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return OptimizeCmpExpression(CI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
}
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType());
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ OptimizeInst(NC);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return OptimizeCallInst(CI);
+
+ return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+ SunkAddrs.clear();
+ bool MadeChange = false;
+
+ CurInstIterator = BB.begin();
+ for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+ MadeChange |= OptimizeInst(CurInstIterator++);
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 44e72e2..be12973 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,6 +16,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -30,20 +31,20 @@ STATISTIC(NumCmps, "Number of comparisons propagated");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
LazyValueInfo *LVI;
-
+
bool processSelect(SelectInst *SI);
bool processPHI(PHINode *P);
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
-
+
public:
static char ID;
CorrelatedValuePropagation(): FunctionPass(ID) {
initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
}
-
+
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
}
@@ -65,46 +66,51 @@ Pass *llvm::createCorrelatedValuePropagationPass() {
bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getOperand(0))) return false;
-
+
Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
if (!C) return false;
-
+
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (!CI) return false;
-
- S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+
+ Value *ReplaceWith = S->getOperand(1);
+ Value *Other = S->getOperand(2);
+ if (!CI->isOne()) std::swap(ReplaceWith, Other);
+ if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+ S->replaceAllUsesWith(ReplaceWith);
S->eraseFromParent();
++NumSelects;
-
+
return true;
}
bool CorrelatedValuePropagation::processPHI(PHINode *P) {
bool Changed = false;
-
+
BasicBlock *BB = P->getParent();
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
-
+
Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
P->getIncomingBlock(i),
BB);
if (!C) continue;
-
+
P->setIncomingValue(i, C);
Changed = true;
}
-
- if (Value *ConstVal = P->hasConstantValue()) {
- P->replaceAllUsesWith(ConstVal);
+
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
}
-
+
++NumPhis;
-
+
return Changed;
}
@@ -114,12 +120,12 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
Pointer = L->getPointerOperand();
else
Pointer = cast<StoreInst>(I)->getPointerOperand();
-
+
if (isa<Constant>(Pointer)) return false;
-
+
Constant *C = LVI->getConstant(Pointer, I->getParent());
if (!C) return false;
-
+
++NumMemAccess;
I->replaceUsesOfWith(Pointer, C);
return true;
@@ -135,32 +141,32 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
if (isa<Instruction>(Op0) &&
cast<Instruction>(Op0)->getParent() == C->getParent())
return false;
-
+
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
-
+
pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
if (PI == PE) return false;
-
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Result == LazyValueInfo::Unknown) return false;
++PI;
while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Res != Result) return false;
++PI;
}
-
+
++NumCmps;
-
+
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
+
C->eraseFromParent();
return true;
@@ -168,9 +174,9 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
LVI = &getAnalysis<LazyValueInfo>();
-
+
bool FnChanged = false;
-
+
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
@@ -192,9 +198,9 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
break;
}
}
-
+
FnChanged |= BBChanged;
}
-
+
return FnChanged;
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1ea0b15..867a06a 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -19,17 +19,20 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumFastStores, "Number of stores deleted");
@@ -37,50 +40,45 @@ STATISTIC(NumFastOther , "Number of other instrs removed");
namespace {
struct DSE : public FunctionPass {
- TargetData *TD;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID) {
+ DSE() : FunctionPass(ID), AA(0), MD(0) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnFunction(Function &F) {
- bool Changed = false;
-
+ AA = &getAnalysis<AliasAnalysis>();
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
+
+ AA = 0; MD = 0;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(const CallInst *F,
- Instruction *Inst,
- MemDepResult Dep);
+ bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
- bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers);
- void DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *deadPointers = 0);
-
+ void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects);
- // getAnalysisUsage - We require post dominance frontiers (aka Control
- // Dependence Graph)
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
-
- uint64_t getPointerSize(Value *V) const;
};
}
@@ -93,9 +91,56 @@ INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// doesClobberMemory - Does this instruction clobber (write without reading)
-/// some memory?
-static bool doesClobberMemory(Instruction *I) {
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+ MemoryDependenceAnalysis &MD,
+ SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MD.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory? This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -113,148 +158,296 @@ static bool doesClobberMemory(Instruction *I) {
return false;
}
-/// isElidable - If the value of this instruction and the memory it writes to is
-/// unused, may we delete this instrtction?
-static bool isElidable(Instruction *I) {
- assert(doesClobberMemory(I));
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return II->getIntrinsicID() != Intrinsic::lifetime_end;
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return AA.getLocation(SI);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ // memcpy/memmove/memset.
+ AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // memset/memcpy, which writes more than an i8.
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+ return AliasAnalysis::Location();
+ return Loc;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II == 0) return AliasAnalysis::Location();
+
+ switch (II->getIntrinsicID()) {
+ default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+ case Intrinsic::init_trampoline:
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // init.trampoline, which writes more than an i8.
+ if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+
+ // FIXME: We don't know the size of the trampoline, so we can't really
+ // handle it here.
+ return AliasAnalysis::Location(II->getArgOperand(0));
+ case Intrinsic::lifetime_end: {
+ uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ return AliasAnalysis::Location(II->getArgOperand(1), Len);
+ }
+ }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+ assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+
+ // The only instructions that both read and write are the mem transfer
+ // instructions (memcpy/memmove).
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ return AA.getLocationForSource(MTI);
+ return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+ // Don't remove volatile stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return !SI->isVolatile();
- return true;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
+
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
-/// getPointerOperand - Return the pointer that is being clobbered.
-static Value *getPointerOperand(Instruction *I) {
- assert(doesClobberMemory(I));
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getArgOperand(0);
+ return MI->getDest();
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return II->getArgOperand(0);
- case Intrinsic::lifetime_end:
- return II->getArgOperand(1);
}
}
-/// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns AliasAnalysis::UnknownSize.
-static uint64_t getStoreSize(Instruction *I, const TargetData *TD) {
- assert(doesClobberMemory(I));
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!TD) return AliasAnalysis::UnknownSize;
- return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+ const TargetData *TD = AA.getTargetData();
+ if (TD == 0)
+ return AliasAnalysis::UnknownSize;
+
+ if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ // Get size information for the alloca
+ if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+ return AliasAnalysis::UnknownSize;
}
+
+ assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+ const PointerType *PT = cast<PointerType>(V->getType());
+ return TD->getTypeAllocSize(PT->getElementType());
+}
- Value *Len;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- Len = MI->getLength();
- } else {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: assert(false && "Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return AliasAnalysis::UnknownSize;
- case Intrinsic::lifetime_end:
- Len = II->getArgOperand(0);
- break;
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return !AI->isArrayAllocation();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->mayBeOverridden();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA) {
+ const Value *P1 = Earlier.Ptr->stripPointerCasts();
+ const Value *P2 = Later.Ptr->stripPointerCasts();
+
+ // If the start pointers are the same, we just have to compare sizes to see if
+ // the later store was larger than the earlier store.
+ if (P1 == P2) {
+ // If we don't know the sizes of either access, then we can't do a
+ // comparison.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize) {
+ // If we have no TargetData information around, then the size of the store
+ // is inferrable from the pointee type. If they are the same type, then
+ // we know that the store is safe.
+ if (AA.getTargetData() == 0)
+ return Later.Ptr->getType() == Earlier.Ptr->getType();
+ return false;
}
+
+ // Make sure that the Later size is >= the Earlier size.
+ if (Later.Size < Earlier.Size)
+ return false;
+ return true;
+ }
+
+ // Otherwise, we have to have size information, and the later store has to be
+ // larger than the earlier one.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize ||
+ Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+ return false;
+
+ // Check to see if the later store is to the entire object (either a global,
+ // an alloca, or a byval argument). If so, then it clearly overwrites any
+ // other store to the same object.
+ const TargetData &TD = *AA.getTargetData();
+
+ const Value *UO1 = GetUnderlyingObject(P1, &TD),
+ *UO2 = GetUnderlyingObject(P2, &TD);
+
+ // If we can't resolve the same pointers to the same object, then we can't
+ // analyze them at all.
+ if (UO1 != UO2)
+ return false;
+
+ // If the "Later" store is to a recognizable object, get its size.
+ if (isObjectPointerWithTrustworthySize(UO2)) {
+ uint64_t ObjectSize =
+ TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+ if (ObjectSize == Later.Size)
+ return true;
}
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
- if (!LenCI->isAllOnesValue())
- return LenCI->getZExtValue();
- return AliasAnalysis::UnknownSize;
+
+ // Okay, we have stores to two completely different pointers. Try to
+ // decompose the pointer into a "base + constant_offset" form. If the base
+ // pointers are equal, then we can reason about the two stores.
+ int64_t Off1 = 0, Off2 = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+
+ // If the base pointers still differ, we have two completely different stores.
+ if (BP1 != BP2)
+ return false;
+
+ // Otherwise, we might have a situation like:
+ // store i16 -> P + 1 Byte
+ // store i32 -> P
+ // In this case, we see if the later store completely overlaps all bytes
+ // stored by the previous store.
+ if (Off1 < Off2 || // Earlier starts before Later.
+ Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
+ return false;
+ // Otherwise, we have complete overlap.
+ return true;
}
-/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
-/// greater than or equal to the store in I2. This returns false if we don't
-/// know.
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense. Consider this case:
///
-static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
- const TargetData *TD) {
- const Type *I1Ty = getPointerOperand(I1)->getType();
- const Type *I2Ty = getPointerOperand(I2)->getType();
+/// memcpy(A <- B)
+/// memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
+///
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+ const AliasAnalysis::Location &InstStoreLoc,
+ Instruction *DepWrite, AliasAnalysis &AA) {
+ // Self reads can only happen for instructions that read memory. Get the
+ // location read.
+ AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+ if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
- // Exactly the same type, must have exactly the same size.
- if (I1Ty == I2Ty) return true;
+ // If the read and written loc obviously don't alias, it isn't a read.
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
- uint64_t I1Size = getStoreSize(I1, TD);
- uint64_t I2Size = getStoreSize(I2, TD);
+ // Okay, 'Inst' may copy over itself. However, we can still remove a the
+ // DepWrite instruction if we can prove that it reads from the same location
+ // as Inst. This handles useful cases like:
+ // memcpy(A <- B)
+ // memcpy(A <- B)
+ // Here we don't know if A/B may alias, but we do know that B/B are must
+ // aliases, so removing the first memcpy is safe (assuming it writes <= #
+ // bytes as the second one.
+ AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
- return I1Size != AliasAnalysis::UnknownSize &&
- I2Size != AliasAnalysis::UnknownSize &&
- I1Size >= I2Size;
+ // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+ // then it can't be considered dead.
+ return true;
}
-bool DSE::runOnBasicBlock(BasicBlock &BB) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<TargetData>();
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
- // If we find a store or a free, get its memory dependence.
- if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
- continue;
-
- MemDepResult InstDep = MD.getDependency(Inst);
-
- // Ignore non-local stores.
- // FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal()) continue;
-
- // Handle frees whose dependencies are non-trivial.
- if (const CallInst *F = isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(F, Inst, InstDep);
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= HandleFree(F);
continue;
}
- // If not a definite must-alias dependency, ignore it.
- if (!InstDep.isDef())
+ // If we find something that writes memory, get its memory dependence.
+ if (!hasMemoryWrite(Inst))
continue;
-
- // If this is a store-store dependence, then the previous store is dead so
- // long as this store is at least as big as it.
- if (doesClobberMemory(InstDep.getInst())) {
- Instruction *DepStore = InstDep.getInst();
- if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
- isElidable(DepStore)) {
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepStore);
- ++NumFastStores;
- MadeChange = true;
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst;
- if (BBI != BB.begin())
- --BBI;
- continue;
- }
- }
+ MemDepResult InstDep = MD->getDependency(Inst);
- if (!isElidable(Inst))
+ // Ignore non-local store liveness.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal() ||
+ // Ignore self dependence, which happens in the entry block of the
+ // function.
+ InstDep.getInst() == Inst)
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
+ SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
+ << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(SI, *MD);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -267,24 +460,63 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
- // If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
- // Delete the store and now-dead instructions that feed it.
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(Inst);
+ // Figure out what location is being stored to.
+ AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (Loc.Ptr == 0)
+ continue;
+
+ while (!InstDep.isNonLocal()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependant instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (DepLoc.Ptr == 0)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
- continue;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
}
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
@@ -296,28 +528,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
-/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
-/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
- Instruction *Inst,
- MemDepResult Dep) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+ MemDepResult Dep = MD->getDependency(F);
do {
+ if (Dep.isNonLocal()) return false;
+
Instruction *Dependency = Dep.getInst();
- if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
+ if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
return false;
- Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
- if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
- AliasAnalysis::MustAlias)
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
return false;
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency);
+ DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
// Inst's old Dependency is now deleted. Compute the next dependency,
@@ -325,8 +555,9 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
// s[0] = 0;
// s[1] = 0; // This has just been deleted.
// free(s);
- Dep = MD.getDependency(Inst);
+ Dep = MD->getDependency(F);
} while (!Dep.isNonLocal());
+
return true;
}
@@ -337,259 +568,163 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
bool MadeChange = false;
- // Pointers alloca'd in this function are dead in the end block
- SmallPtrSet<Value*, 64> deadPointers;
+ // Keep track of all of the stack objects that are dead at the end of the
+ // function.
+ SmallPtrSet<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
- // If we find a store whose pointer is dead.
- if (doesClobberMemory(BBI)) {
- if (isElidable(BBI)) {
- // See through pointer-to-pointer bitcasts
- Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
-
- // Alloca'd pointers or byval arguments (which are functionally like
- // alloca's) are valid candidates for removal.
- if (deadPointers.count(pointerOperand)) {
- // DCE instructions only used to calculate that store.
- Instruction *Dead = BBI;
- ++BBI;
- DeleteDeadInstruction(Dead, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
- continue;
- }
- }
-
- // Because a memcpy or memmove is also a load, we can't skip it if we
- // didn't remove it.
- if (!isa<MemTransferInst>(BBI))
+ // If we find a store, check to see if it points into a dead stack value.
+ if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+ // Stores to stack values are valid candidates for removal.
+ if (DeadStackObjects.count(Pointer)) {
+ Instruction *Dead = BBI++;
+
+ DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
+ << *Dead << "\n Object: " << *Pointer << '\n');
+
+ // DCE instructions only used to calculate that store.
+ DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ ++NumFastStores;
+ MadeChange = true;
continue;
+ }
}
- Value *killPointer = 0;
- uint64_t killPointerSize = AliasAnalysis::UnknownSize;
+ // Remove any dead non-memory-mutating instructions.
+ if (isInstructionTriviallyDead(BBI)) {
+ Instruction *Inst = BBI++;
+ DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
- // If we encounter a use of the pointer, it is no longer considered dead
- if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
- // However, if this load is unused and not volatile, we can go ahead and
- // remove it, and not have to worry about it making our pointer undead!
- if (L->use_empty() && !L->isVolatile()) {
- ++BBI;
- DeleteDeadInstruction(L, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- continue;
- }
-
- killPointer = L->getPointerOperand();
- } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
- killPointer = V->getOperand(0);
- } else if (isa<MemTransferInst>(BBI) &&
- isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
- killPointer = cast<MemTransferInst>(BBI)->getSource();
- killPointerSize = cast<ConstantInt>(
- cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
- } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
- deadPointers.erase(A);
-
- // Dead alloca's can be DCE'd when we reach them
- if (A->use_empty()) {
- ++BBI;
- DeleteDeadInstruction(A, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- }
-
+ if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+ DeadStackObjects.erase(A);
continue;
- } else if (CallSite CS = cast<Value>(BBI)) {
- // If this call does not access memory, it can't
- // be undeadifying any of our pointers.
- if (AA.doesNotAccessMemory(CS))
+ }
+
+ if (CallSite CS = cast<Value>(BBI)) {
+ // If this call does not access memory, it can't be loading any of our
+ // pointers.
+ if (AA->doesNotAccessMemory(CS))
continue;
- unsigned modRef = 0;
- unsigned other = 0;
+ unsigned NumModRef = 0, NumOther = 0;
- // Remove any pointers made undead by the call from the dead set
- std::vector<Value*> dead;
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // HACK: if we detect that our AA is imprecise, it's not
- // worth it to scan the rest of the deadPointers set. Just
- // assume that the AA will return ModRef for everything, and
- // go ahead and bail.
- if (modRef >= 16 && other == 0) {
- deadPointers.clear();
+ // If the call might load from any of our allocas, then any store above
+ // the call is live.
+ SmallVector<Value*, 8> LiveAllocas;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // If we detect that our AA is imprecise, it's not worth it to scan the
+ // rest of the DeadPointers set. Just assume that the AA will return
+ // ModRef for everything, and go ahead and bail out.
+ if (NumModRef >= 16 && NumOther == 0)
return MadeChange;
- }
-
- // See if the call site touches it
- AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
- getPointerSize(*I));
+
+ // See if the call site touches it.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
if (A == AliasAnalysis::ModRef)
- ++modRef;
+ ++NumModRef;
else
- ++other;
+ ++NumOther;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
- dead.push_back(*I);
+ LiveAllocas.push_back(*I);
}
-
- for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
- I != E; ++I)
- deadPointers.erase(*I);
- continue;
- } else if (isInstructionTriviallyDead(BBI)) {
- // For any non-memory-affecting non-terminators, DCE them as we reach them
- Instruction *Inst = BBI;
- ++BBI;
- DeleteDeadInstruction(Inst, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.erase(*I);
+
+ // If all of the allocas were clobbered by the call then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ return MadeChange;
+
continue;
}
- if (!killPointer)
+ AliasAnalysis::Location LoadedLoc;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ LoadedLoc = AA->getLocation(L);
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ LoadedLoc = AA->getLocation(V);
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+ LoadedLoc = AA->getLocationForSource(MTI);
+ } else {
+ // Not a loading instruction.
continue;
+ }
- killPointer = killPointer->getUnderlyingObject();
+ // Remove any allocas from the DeadPointer set that are loaded, as this
+ // makes any stores above the access live.
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
- // Deal with undead pointers
- MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
- deadPointers);
+ // If all of the allocas were clobbered by the access then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
}
return MadeChange;
}
-/// RemoveUndeadPointers - check for uses of a pointer that make it
-/// undead when scanning for dead stores to alloca's.
-bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- // If the kill pointer can be easily reduced to an alloca,
- // don't bother doing extraneous AA queries.
- if (deadPointers.count(killPointer)) {
- deadPointers.erase(killPointer);
- return false;
- }
-
- // A global can't be in the dead pointer set.
- if (isa<GlobalValue>(killPointer))
- return false;
-
- bool MadeChange = false;
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set. If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
- SmallVector<Value*, 16> undead;
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // See if this pointer could alias it
- AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
- killPointer, killPointerSize);
-
- // If it must-alias and a store, we can delete it
- if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
- StoreInst *S = cast<StoreInst>(BBI);
-
- // Remove it!
- ++BBI;
- DeleteDeadInstruction(S, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
-
- continue;
-
- // Otherwise, it is undead
- } else if (A != AliasAnalysis::NoAlias)
- undead.push_back(*I);
+ SmallVector<Value*, 16> NowLive;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+ if (!AA->isNoAlias(StackLoc, LoadedLoc))
+ NowLive.push_back(*I);
}
- for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+ for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
I != E; ++I)
- deadPointers.erase(*I);
-
- return MadeChange;
+ DeadStackObjects.erase(*I);
}
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
-/// and zero out all the operands of this instruction. If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
-/// If ValueSet is non-null, remove any deleted instructions from it as well.
-///
-void DSE::DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *ValueSet) {
- SmallVector<Instruction*, 32> NowDeadInsts;
-
- NowDeadInsts.push_back(I);
- --NumFastOther;
-
- // Before we touch this instruction, remove it from memdep!
- MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
- do {
- Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
- ++NumFastOther;
-
- // This instruction is dead, zap it, in stages. Start by removing it from
- // MemDep, which needs to know the operands and needs it to be in the
- // function.
- MDA.removeInstruction(DeadInst);
-
- for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
- Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
-
- // If this operand just became dead, add it to the NowDeadInsts list.
- if (!Op->use_empty()) continue;
-
- if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
- NowDeadInsts.push_back(OpI);
- }
-
- DeadInst->eraseFromParent();
-
- if (ValueSet) ValueSet->erase(DeadInst);
- } while (!NowDeadInsts.empty());
-}
-
-uint64_t DSE::getPointerSize(Value *V) const {
- if (TD) {
- if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
- // Get size information for the alloca
- if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
- return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- } else {
- assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- const PointerType *PT = cast<PointerType>(V->getType());
- return TD->getTypeAllocSize(PT->getElementType());
- }
- }
- return AliasAnalysis::UnknownSize;
-}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 0000000..3d3f17b
--- /dev/null
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
+STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumDSE, "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+ return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SimpleValue - Instances of this struct represent available values in the
+ /// scoped hash table.
+ struct SimpleValue {
+ Instruction *Inst;
+
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+ };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+ static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+ static inline SimpleValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline SimpleValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(SimpleValue Val);
+ static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+ Instruction *Inst = Val.Inst;
+
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ Res ^= getHash(Inst->getOperand(i)) << i;
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ Res ^= getHash(CI->getType());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+ Res ^= CI->getPredicate();
+ else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+ for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+ E = EVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+ for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+ E = IVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else {
+ // nothing extra to hash in.
+ assert((isa<CallInst>(Inst) ||
+ isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+ "Invalid/unknown instruction");
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+
+ if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// CallValue - Instances of this struct represent available call values in
+ /// the scoped hash table.
+ struct CallValue {
+ Instruction *Inst;
+
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (CI == 0 || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+ };
+}
+
+namespace llvm {
+ // CallValue is POD.
+ template<> struct isPodLike<CallValue> {
+ static const bool value = true;
+ };
+
+ template<> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+ };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+ assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+ "Cannot value number calls with metadata operands");
+ Res ^= getHash(Inst->getOperand(i)) << i;
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes. It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective. It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+ const TargetData *TD;
+ DominatorTree *DT;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ AllocatorTy> ScopedHTType;
+
+ /// AvailableValues - This scoped hash table contains the current values of
+ /// all of our simple scalar expressions. As we walk down the domtree, we
+ /// look to see if instructions are in this: if so, we replace them with what
+ /// we find, otherwise we insert them so that dominated values can succeed in
+ /// their lookup.
+ ScopedHTType *AvailableValues;
+
+ /// AvailableLoads - This scoped hash table contains the current values
+ /// of loads. This allows us to get efficient access to dominating loads when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is
+ /// incremented after every possibly writing memory operation, which ensures
+ /// that we only CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+ typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+ DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+ LoadHTType *AvailableLoads;
+
+ /// AvailableCalls - This scoped hash table contains the current values
+ /// of read-only call values. It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+ CallHTType *AvailableCalls;
+
+ /// CurrentGeneration - This is the current generation of the memory value.
+ unsigned CurrentGeneration;
+
+ static char ID;
+ explicit EarlyCSE() : FunctionPass(ID) {
+ initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+private:
+
+ bool processNode(DomTreeNode *Node);
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+ return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+ // Define a scope in the scoped hash table. When we are done processing this
+ // domtree node and recurse back up to our parent domtree node, this will pop
+ // off all the values we install.
+ ScopedHTType::ScopeTy Scope(*AvailableValues);
+
+ // Define a scope for the load values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+
+ // Define a scope for the call values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ CallHTType::ScopeTy CallScope(*AvailableCalls);
+
+ BasicBlock *BB = Node->getBlock();
+
+ // If this block has a single predecessor, then the predecessor is the parent
+ // of the domtree node and all of the live out memory values are still current
+ // in this block. If this block has multiple predecessors, then they could
+ // have invalidated the live-out memory values of our parent value. For now,
+ // just be conservative and invalidate memory if this block has multiple
+ // predecessors.
+ if (BB->getSinglePredecessor() == 0)
+ ++CurrentGeneration;
+
+ /// LastStore - Keep track of the last non-volatile store that we saw... for
+ /// as long as there in no instruction that reads memory. If we see a store
+ /// to the same location, we delete the dead store. This zaps trivial dead
+ /// stores which can occur in bitfield code among other things.
+ StoreInst *LastStore = 0;
+
+ bool Changed = false;
+
+ // See if any instructions in the block can be eliminated. If so, do it. If
+ // not, add them to AvailableValues.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ // Dead instructions should just be removed.
+ if (isInstructionTriviallyDead(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+ // its simpler value.
+ if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If this is a simple instruction that we can value number, process it.
+ if (SimpleValue::canHandle(Inst)) {
+ // See if the instruction has an available value. If so, use it.
+ if (Value *V = AvailableValues->lookup(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableValues->insert(Inst, Inst);
+ continue;
+ }
+
+ // If this is a non-volatile load, process it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Ignore volatile loads.
+ if (LI->isVolatile()) {
+ LastStore = 0;
+ continue;
+ }
+
+ // If we have an available version of this load, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal =
+ AvailableLoads->lookup(Inst->getOperand(0));
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableLoads->insert(Inst->getOperand(0),
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ LastStore = 0;
+ continue;
+ }
+
+ // If this instruction may read from memory, forget LastStore.
+ if (Inst->mayReadFromMemory())
+ LastStore = 0;
+
+ // If this is a read-only call, process it.
+ if (CallValue::canHandle(Inst)) {
+ // If we have an available version of this call, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSECall;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableCalls->insert(Inst,
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ continue;
+ }
+
+ // Okay, this isn't something we can CSE at all. Check to see if it is
+ // something that could modify memory. If so, our available memory values
+ // cannot be used so bump the generation count.
+ if (Inst->mayWriteToMemory()) {
+ ++CurrentGeneration;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // We do a trivial form of DSE if there are two stores to the same
+ // location with no intervening loads. Delete the earlier store.
+ if (LastStore &&
+ LastStore->getPointerOperand() == SI->getPointerOperand()) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
+ << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = 0;
+ continue;
+ }
+
+ // Okay, we just invalidated anything we knew about loaded values. Try
+ // to salvage *something* by remembering that the stored value is a live
+ // version of the pointer. It is safe to forward from volatile stores
+ // to non-volatile loads, so we don't have to check for volatility of
+ // the store.
+ AvailableLoads->insert(SI->getPointerOperand(),
+ std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+
+ // Remember that this was the last store we saw for DSE.
+ if (!SI->isVolatile())
+ LastStore = SI;
+ }
+ }
+ }
+
+ unsigned LiveOutGeneration = CurrentGeneration;
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+ Changed |= processNode(*I);
+ // Pop any generation changes off the stack from the recursive walk.
+ CurrentGeneration = LiveOutGeneration;
+ }
+ return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Tables that the pass uses when walking the domtree.
+ ScopedHTType AVTable;
+ AvailableValues = &AVTable;
+ LoadHTType LoadTable;
+ AvailableLoads = &LoadTable;
+ CallHTType CallTable;
+ AvailableCalls = &CallTable;
+
+ CurrentGeneration = 0;
+ return processNode(DT->getRootNode());
+}
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
deleted file mode 100644
index 4c3d188..0000000
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function breaks GEPs with more than 2 non-zero operands into smaller
-// GEPs each with no more than 2 non-zero operands. This exposes redundancy
-// between GEPs with common initial operand sequences.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "split-geps"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- class GEPSplitter : public FunctionPass {
- virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(ID) {
- initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char GEPSplitter::ID = 0;
-INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false)
-
-FunctionPass *llvm::createGEPSplitterPass() {
- return new GEPSplitter();
-}
-
-bool GEPSplitter::runOnFunction(Function &F) {
- bool Changed = false;
-
- // Visit each GEP instruction.
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
- unsigned NumOps = GEP->getNumOperands();
- // Ignore GEPs which are already simple.
- if (NumOps <= 2)
- continue;
- bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
- cast<ConstantInt>(GEP->getOperand(1))->isZero();
- if (NumOps == 3 && FirstIndexIsZero)
- continue;
- // The first index is special and gets expanded with a 2-operand GEP
- // (unless it's zero, in which case we can skip this).
- Value *NewGEP = FirstIndexIsZero ?
- GEP->getOperand(0) :
- GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
- "tmp", GEP);
- // All remaining indices get expanded with a 3-operand GEP with zero
- // as the second operand.
- Value *Idxs[2];
- Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
- for (unsigned i = 2; i != NumOps; ++i) {
- Idxs[1] = GEP->getOperand(i);
- NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
- }
- GEP->replaceAllUsesWith(NewGEP);
- GEP->eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
-
-void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 732f6c8..a0123f5 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -17,21 +17,9 @@
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
@@ -40,17 +28,19 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@@ -72,77 +62,24 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
/// two values.
namespace {
struct Expression {
- enum ExpressionOpcode {
- ADD = Instruction::Add,
- FADD = Instruction::FAdd,
- SUB = Instruction::Sub,
- FSUB = Instruction::FSub,
- MUL = Instruction::Mul,
- FMUL = Instruction::FMul,
- UDIV = Instruction::UDiv,
- SDIV = Instruction::SDiv,
- FDIV = Instruction::FDiv,
- UREM = Instruction::URem,
- SREM = Instruction::SRem,
- FREM = Instruction::FRem,
- SHL = Instruction::Shl,
- LSHR = Instruction::LShr,
- ASHR = Instruction::AShr,
- AND = Instruction::And,
- OR = Instruction::Or,
- XOR = Instruction::Xor,
- TRUNC = Instruction::Trunc,
- ZEXT = Instruction::ZExt,
- SEXT = Instruction::SExt,
- FPTOUI = Instruction::FPToUI,
- FPTOSI = Instruction::FPToSI,
- UITOFP = Instruction::UIToFP,
- SITOFP = Instruction::SIToFP,
- FPTRUNC = Instruction::FPTrunc,
- FPEXT = Instruction::FPExt,
- PTRTOINT = Instruction::PtrToInt,
- INTTOPTR = Instruction::IntToPtr,
- BITCAST = Instruction::BitCast,
- ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
- ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
- FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
- FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
- FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
- SHUFFLE, SELECT, GEP, CALL, CONSTANT,
- INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
- ExpressionOpcode opcode;
+ uint32_t opcode;
const Type* type;
SmallVector<uint32_t, 4> varargs;
- Value *function;
Expression() { }
- Expression(ExpressionOpcode o) : opcode(o) { }
+ Expression(uint32_t o) : opcode(o) { }
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
- else if (opcode == EMPTY || opcode == TOMBSTONE)
+ else if (opcode == ~0U || opcode == ~1U)
return true;
else if (type != other.type)
return false;
- else if (function != other.function)
+ else if (varargs != other.varargs)
return false;
- else {
- if (varargs.size() != other.varargs.size())
- return false;
-
- for (size_t i = 0; i < varargs.size(); ++i)
- if (varargs[i] != other.varargs[i])
- return false;
-
- return true;
- }
+ return true;
}
-
- /*bool operator!=(const Expression &other) const {
- return !(*this == other);
- }*/
};
class ValueTable {
@@ -155,19 +92,7 @@ namespace {
uint32_t nextValueNumber;
- Expression::ExpressionOpcode getOpcode(CmpInst* C);
- Expression create_expression(BinaryOperator* BO);
- Expression create_expression(CmpInst* C);
- Expression create_expression(ShuffleVectorInst* V);
- Expression create_expression(ExtractElementInst* C);
- Expression create_expression(InsertElementInst* V);
- Expression create_expression(SelectInst* V);
- Expression create_expression(CastInst* C);
- Expression create_expression(GetElementPtrInst* G);
- Expression create_expression(CallInst* C);
- Expression create_expression(ExtractValueInst* C);
- Expression create_expression(InsertValueInst* C);
-
+ Expression create_expression(Instruction* I);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@@ -188,11 +113,11 @@ namespace {
namespace llvm {
template <> struct DenseMapInfo<Expression> {
static inline Expression getEmptyKey() {
- return Expression(Expression::EMPTY);
+ return ~0U;
}
static inline Expression getTombstoneKey() {
- return Expression(Expression::TOMBSTONE);
+ return ~1U;
}
static unsigned getHashValue(const Expression e) {
@@ -204,20 +129,13 @@ template <> struct DenseMapInfo<Expression> {
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
hash = *I + hash * 37;
-
- hash = ((unsigned)((uintptr_t)e.function >> 4) ^
- (unsigned)((uintptr_t)e.function >> 9)) +
- hash * 37;
-
+
return hash;
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
};
-
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
}
@@ -225,185 +143,27 @@ struct isPodLike<Expression> { static const bool value = true; };
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
- if (isa<ICmpInst>(C)) {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
- case ICmpInst::ICMP_NE: return Expression::ICMPNE;
- case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
- case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
- case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
- case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
- case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
- case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
- case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
- case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
- }
- } else {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
- case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
- case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
- case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
- case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
- case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
- case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
- case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
- case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
- case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
- case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
- case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
- case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
- case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
- }
- }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
- Expression e;
-
- e.type = C->getType();
- e.function = C->getCalledFunction();
- e.opcode = Expression::CALL;
-
- CallSite CS(C);
- for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
- Expression e;
- e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
- e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
- e.function = 0;
- e.type = BO->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
-
- return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.varargs.push_back(lookup_or_add(C->getOperand(1)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = getOpcode(C);
-
- return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
- return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(S->getOperand(0)));
- e.varargs.push_back(lookup_or_add(S->getOperand(1)));
- e.varargs.push_back(lookup_or_add(S->getOperand(2)));
- e.function = 0;
- e.type = S->getType();
- e.opcode = Expression::SHUFFLE;
-
- return e;
-}
-
-Expression ValueTable::create_expression(ExtractElementInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getOperand(0)));
- e.varargs.push_back(lookup_or_add(E->getOperand(1)));
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
+Expression ValueTable::create_expression(Instruction *I) {
Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getOperand(0)));
- e.varargs.push_back(lookup_or_add(I->getOperand(1)));
- e.varargs.push_back(lookup_or_add(I->getOperand(2)));
- e.function = 0;
e.type = I->getType();
- e.opcode = Expression::INSERT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getCondition()));
- e.varargs.push_back(lookup_or_add(I->getTrueValue()));
- e.varargs.push_back(lookup_or_add(I->getFalseValue()));
- e.function = 0;
- e.type = I->getType();
- e.opcode = Expression::SELECT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
- e.function = 0;
- e.type = G->getType();
- e.opcode = Expression::GEP;
-
- for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACTVALUE;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
- for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::INSERTVALUE;
-
+ e.opcode = I->getOpcode();
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+ else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
+ for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ }
+
return e;
}
@@ -562,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::And:
case Instruction::Or :
case Instruction::Xor:
- exp = create_expression(cast<BinaryOperator>(I));
- break;
case Instruction::ICmp:
case Instruction::FCmp:
- exp = create_expression(cast<CmpInst>(I));
- break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -580,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
- exp = create_expression(cast<CastInst>(I));
- break;
case Instruction::Select:
- exp = create_expression(cast<SelectInst>(I));
- break;
case Instruction::ExtractElement:
- exp = create_expression(cast<ExtractElementInst>(I));
- break;
case Instruction::InsertElement:
- exp = create_expression(cast<InsertElementInst>(I));
- break;
case Instruction::ShuffleVector:
- exp = create_expression(cast<ShuffleVectorInst>(I));
- break;
case Instruction::ExtractValue:
- exp = create_expression(cast<ExtractValueInst>(I));
- break;
case Instruction::InsertValue:
- exp = create_expression(cast<InsertValueInst>(I));
- break;
case Instruction::GetElementPtr:
- exp = create_expression(cast<GetElementPtrInst>(I));
+ exp = create_expression(I);
break;
default:
valueNumbering[V] = nextValueNumber;
@@ -648,15 +390,6 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
- struct ValueNumberScope {
- ValueNumberScope* parent;
- DenseMap<uint32_t, Value*> table;
-
- ValueNumberScope(ValueNumberScope* p) : parent(p) { }
- };
-}
-
-namespace {
class GVN : public FunctionPass {
bool runOnFunction(Function &F);
@@ -674,7 +407,59 @@ namespace {
const TargetData* TD;
ValueTable VN;
- DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+
+ /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// have that value number. Use findLeader to query it.
+ struct LeaderTableEntry {
+ Value *Val;
+ BasicBlock *BB;
+ LeaderTableEntry *Next;
+ };
+ DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+ BumpPtrAllocator TableAllocator;
+
+ /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+ /// its value number.
+ void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry& Curr = LeaderTable[N];
+ if (!Curr.Val) {
+ Curr.Val = V;
+ Curr.BB = BB;
+ return;
+ }
+
+ LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+ Node->Val = V;
+ Node->BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+ }
+
+ /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// value number, and remove the given value if encountered.
+ void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Curr = &LeaderTable[N];
+
+ while (Curr->Val != V || Curr->BB != BB) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Val = 0;
+ Curr->BB = 0;
+ } else {
+ LeaderTableEntry* Next = Curr->Next;
+ Curr->Val = Next->Val;
+ Curr->BB = Next->BB;
+ Curr->Next = Next->Next;
+ }
+ }
+ }
// List of critical edges to be split between iterations.
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
@@ -701,9 +486,8 @@ namespace {
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*>& d);
bool iterateOnFunction(Function &F);
- Value *CollapsePhi(PHINode* p);
bool performPRE(Function& F);
- Value *lookupNumber(BasicBlock *BB, uint32_t num);
+ Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
@@ -733,33 +517,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "}\n";
}
-static bool isSafeReplacement(PHINode* p, Instruction *inst) {
- if (!isa<PHINode>(inst))
- return true;
-
- for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
- UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
- if (use_phi->getParent() == inst->getParent())
- return false;
-
- return true;
-}
-
-Value *GVN::CollapsePhi(PHINode *PN) {
- Value *ConstVal = PN->hasConstantValue(DT);
- if (!ConstVal) return 0;
-
- Instruction *Inst = dyn_cast<Instruction>(ConstVal);
- if (!Inst)
- return ConstVal;
-
- if (DT->dominates(Inst, PN))
- if (isSafeReplacement(PN, Inst))
- return Inst;
- return 0;
-}
-
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
@@ -943,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
-/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
-/// be expressed as a base pointer plus a constant offset. Return the base and
-/// offset to the caller.
-static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const TargetData &TD) {
- Operator *PtrOp = dyn_cast<Operator>(Ptr);
- if (PtrOp == 0) return Ptr;
-
- // Just look through bitcasts.
- if (PtrOp->getOpcode() == Instruction::BitCast)
- return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
-
- // If this is a GEP with constant indices, we can look through it.
- GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
- if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
- ++I, ++GTI) {
- ConstantInt *OpC = cast<ConstantInt>(*I);
- if (OpC->isZero()) continue;
-
- // Handle a struct and array indices which add their offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- } else {
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
- Offset += OpC->getSExtValue()*Size;
- }
- }
-
- // Re-sign extend from the pointer size if needed to get overflow edge cases
- // right.
- unsigned PtrSize = TD.getPointerSizeInBits();
- if (PtrSize < 64)
- Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
-
- return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
-}
-
-
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
/// memdep query of a load that ends up being a clobbering memory write (store,
/// memset, memcpy, memmove). This means that the write *may* provide bits used
@@ -1002,9 +718,8 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
- Value *LoadBase =
- GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
@@ -1026,8 +741,6 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
- // remove this check, as it is duplicated with what we have below.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
@@ -1105,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
@@ -1337,6 +1050,15 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
+
+ // Now that we've copied information to the new PHIs, scan through
+ // them again and inform alias analysis that we've added potentially
+ // escaping uses to any values that are operands to these PHIs.
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+ PHINode *P = NewPHIs[i];
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
+ AA->addEscapingUse(P->getOperandUse(2*ii));
+ }
return V;
}
@@ -1643,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
- // It is valid to have the getelementptr before the test, even if p can be 0,
- // as getelementptr only does address arithmetic.
+ // It is valid to have the getelementptr before the test, even if p can
+ // be 0, as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
@@ -1661,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
if (!CanDoPRE) {
- while (!NewInsts.empty())
- NewInsts.pop_back_val()->eraseFromParent();
+ while (!NewInsts.empty()) {
+ Instruction *I = NewInsts.pop_back_val();
+ if (MD) MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
return false;
}
@@ -1688,9 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
BasicBlock *UnavailablePred = I->first;
Value *LoadPtr = I->second;
- Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
- LI->getAlignment(),
- UnavailablePred->getTerminator());
+ Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Transfer the old load's TBAA tag to the new load.
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+ NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
@@ -1874,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
return false;
}
-Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
- DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
- if (I == localAvail.end())
- return 0;
-
- ValueNumberScope *Locals = I->second;
- while (Locals) {
- DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
- if (I != Locals->table.end())
- return I->second;
- Locals = Locals->parent;
+// findLeader - In order to find a leader for a given value number at a
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in
+// question. This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+ LeaderTableEntry Vals = LeaderTable[num];
+ if (!Vals.Val) return 0;
+
+ Value *Val = 0;
+ if (DT->dominates(Vals.BB, BB)) {
+ Val = Vals.Val;
+ if (isa<Constant>(Val)) return Val;
+ }
+
+ LeaderTableEntry* Next = Vals.Next;
+ while (Next) {
+ if (DT->dominates(Next->BB, BB)) {
+ if (isa<Constant>(Next->Val)) return Next->Val;
+ if (!Val) Val = Next->Val;
+ }
+
+ Next = Next->Next;
}
- return 0;
+ return Val;
}
@@ -1917,80 +1658,74 @@ bool GVN::processInstruction(Instruction *I,
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
+ addToLeaderTable(Num, LI, LI->getParent());
}
return Changed;
}
- uint32_t NextNum = VN.getNextUnusedValueNumber();
- unsigned Num = VN.lookup_or_add(I);
-
+ // For conditions branches, we can perform simple conditional propagation on
+ // the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
+
if (TrueSucc->getSinglePredecessor())
- localAvail[TrueSucc]->table[CondVN] =
- ConstantInt::getTrue(TrueSucc->getContext());
+ addToLeaderTable(CondVN,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
if (FalseSucc->getSinglePredecessor())
- localAvail[FalseSucc]->table[CondVN] =
- ConstantInt::getFalse(TrueSucc->getContext());
-
+ addToLeaderTable(CondVN,
+ ConstantInt::getFalse(TrueSucc->getContext()),
+ FalseSucc);
+
return false;
+ }
+
+ // Instructions with void type don't return a value, so there's
+ // no point in trying to find redudancies in them.
+ if (I->getType()->isVoidTy()) return false;
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
- } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ addToLeaderTable(Num, I, I->getParent());
return false;
}
- // Collapse PHI nodes
- if (PHINode* p = dyn_cast<PHINode>(I)) {
- Value *constVal = CollapsePhi(p);
-
- if (constVal) {
- p->replaceAllUsesWith(constVal);
- if (MD && constVal->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(constVal);
- VN.erase(p);
-
- toErase.push_back(p);
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
- }
-
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
- } else if (Num == NextNum) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
+ if (Num == NextNum) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
- // Remove it!
- VN.erase(I);
- I->replaceAllUsesWith(repl);
- if (MD && repl->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
- toErase.push_back(I);
- return true;
-
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ Value *repl = findLeader(I->getParent(), Num);
+ if (repl == 0) {
+ // Failure, just remember this instance for future use.
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
}
-
- return false;
+
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (MD && repl->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
}
/// runOnFunction - This is the main transformation entry point for a function.
@@ -2009,8 +1744,8 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI;
- ++FI;
+ BasicBlock *BB = FI++;
+
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
@@ -2018,7 +1753,6 @@ bool GVN::runOnFunction(Function& F) {
}
unsigned Iteration = 0;
-
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
@@ -2136,20 +1870,19 @@ bool GVN::performPRE(Function &F) {
if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!localAvail.count(P)) {
+ } else if (!DT->dominates(&F.getEntryBlock(), P)) {
NumWithout = 2;
break;
}
- DenseMap<uint32_t, Value*>::iterator predV =
- localAvail[P]->table.find(ValNo);
- if (predV == localAvail[P]->table.end()) {
+ Value* predV = findLeader(P, ValNo);
+ if (predV == 0) {
PREPred = P;
++NumWithout;
- } else if (predV->second == CurInst) {
+ } else if (predV == CurInst) {
NumWithout = 2;
} else {
- predMap[P] = predV->second;
+ predMap[P] = predV;
++NumWith;
}
}
@@ -2184,7 +1917,7 @@ bool GVN::performPRE(Function &F) {
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
- if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+ if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
PREInstr->setOperand(i, V);
} else {
success = false;
@@ -2208,7 +1941,7 @@ bool GVN::performPRE(Function &F) {
++NumGVNPRE;
// Update the availability map to include the new instruction.
- localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+ addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
PHINode* Phi = PHINode::Create(CurInst->getType(),
@@ -2221,12 +1954,21 @@ bool GVN::performPRE(Function &F) {
}
VN.add(Phi, ValNo);
- localAvail[CurrentBlock]->table[ValNo] = Phi;
+ addToLeaderTable(ValNo, Phi, CurrentBlock);
CurInst->replaceAllUsesWith(Phi);
- if (MD && Phi->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(Phi);
+ if (Phi->getType()->isPointerTy()) {
+ // Because we have added a PHI-use of the pointer value, it has now
+ // "escaped" from alias analysis' perspective. We need to inform
+ // AA of this.
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+
+ if (MD)
+ MD->invalidateCachedPointerInfo(Phi);
+ }
VN.erase(CurInst);
+ removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD) MD->removeInstruction(CurInst);
@@ -2258,16 +2000,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
-
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
- if (DI->getIDom())
- localAvail[DI->getBlock()] =
- new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
- else
- localAvail[DI->getBlock()] = new ValueNumberScope(0);
- }
-
+
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@@ -2287,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
-
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
- delete I->second;
- localAvail.clear();
+ LeaderTable.clear();
+ TableAllocator.Reset();
}
/// verifyRemoved - Verify that the specified instruction does not occur in our
@@ -2301,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
- const ValueNumberScope *VNS = I->second;
-
- while (VNS) {
- for (DenseMap<uint32_t, Value*>::const_iterator
- II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
- assert(II->second != Inst && "Inst still in value numbering scope!");
- }
-
- VNS = VNS->parent;
+ for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+ I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+ const LeaderTableEntry *Node = &I->second;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+
+ while (Node->Next) {
+ Node = Node->Next;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
}
}
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0fad048..0fb6798 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -200,7 +200,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
}
// Expand the code for the iteration count.
- assert(RHS->isLoopInvariant(L) &&
+ assert(SE->isLoopInvariant(RHS, L) &&
"Computed iteration count is not loop invariant!");
Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
@@ -243,8 +243,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L,
- SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Verify the input to the pass in already in LCSSA form.
assert(L->isLCSSAForm(*DT));
@@ -302,7 +301,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!ExitValue->isLoopInvariant(L))
+ if (!SE->isLoopInvariant(ExitValue, L))
continue;
Changed = true;
@@ -348,7 +347,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
//
- BasicBlock *Header = L->getHeader();
+ BasicBlock *Header = L->getHeader();
SmallVector<WeakVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
@@ -617,9 +616,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L) {
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
- if (S->isLoopInvariant(L)) return true;
+ if (SE->isLoopInvariant(S, L)) return true;
// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
// to transform them into efficient code.
@@ -630,18 +629,18 @@ static bool isSafe(const SCEV *S, const Loop *L) {
if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L)) return false;
+ if (!isSafe(*I, L, SE)) return false;
return true;
}
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L);
+ return isSafe(C->getOperand(), L, SE);
// A udiv is safe if its operands are.
if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L) &&
- isSafe(UD->getRHS(), L);
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
// SCEVUnknown is always safe.
if (isa<SCEVUnknown>(S))
@@ -672,7 +671,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// Evaluate the expression out of the loop, if possible.
if (!L->contains(UI->getUser())) {
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (ExitVal->isLoopInvariant(L))
+ if (SE->isLoopInvariant(ExitVal, L))
AR = ExitVal;
}
@@ -682,7 +681,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
- if (!isSafe(AR, L))
+ if (!isSafe(AR, L, SE))
continue;
// Determine the insertion point for this user. By default, insert
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 8f12ee0..90094a8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -40,11 +40,22 @@ STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
static cl::opt<unsigned>
-Threshold("jump-threading-threshold",
+Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
namespace {
+ // These are at global scope so static functions can use them too.
+ typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+ typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
@@ -70,16 +81,16 @@ namespace {
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
-
+
// RAII helper for updating the recursion stack.
struct RecursionSetRemover {
DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
std::pair<Value*, BasicBlock*> ThePair;
-
+
RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
std::pair<Value*, BasicBlock*> P)
: TheSet(S), ThePair(P) { }
-
+
~RecursionSetRemover() {
TheSet.erase(ThePair);
}
@@ -91,33 +102,28 @@ namespace {
}
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
}
-
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs);
-
- typedef SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > PredValueInfo;
-
+
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
- PredValueInfo &Result);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
-
-
- bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
-
+
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
}
@@ -138,20 +144,20 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
LVI = &getAnalysis<LazyValueInfo>();
-
+
FindLoopHeaders(F);
-
+
bool Changed, EverChanged = false;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
- // Thread all of the branches we can over this block.
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
-
+
++I;
-
+
// If the block is trivially dead, zap it. This eliminates the successor
// edges which simplifies the CFG.
if (pred_begin(BB) == pred_end(BB) &&
@@ -162,45 +168,43 @@ bool JumpThreading::runOnFunction(Function &F) {
LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // Can't thread an unconditional jump, but if the block is "almost
- // empty", we can replace uses of it with uses of the successor and make
- // this dead.
- if (BI->isUnconditional() &&
- BB != &BB->getParent()->getEntryBlock()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- if (BBI->isTerminator()) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // FIXME: It is always conservatively correct to drop the info
- // for a block even if it doesn't get erased. This isn't totally
- // awesome, but it allows us to use AssertingVH to prevent nasty
- // dangling pointer issues within LazyValueInfo.
- LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
- Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
- }
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
}
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
} while (Changed);
-
+
LoopHeaders.clear();
return EverChanged;
}
@@ -210,25 +214,25 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
-
+
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
-
-
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
-
+
// If this is a pointer->pointer bitcast, it is free.
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
-
+
// All other instructions count for at least one unit.
++Size;
-
+
// Calls are more expensive. If they are non-intrinsic calls, we model them
// as having cost of 4. If they are a non-vector intrinsic, we model them
// as having cost of 2 total, and if they are a vector intrinsic, we model
@@ -240,12 +244,16 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
Size += 1;
}
}
-
+
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to happen.
if (isa<SwitchInst>(I))
Size = Size > 6 ? Size-6 : 0;
-
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
return Size;
}
@@ -267,57 +275,64 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
-
+
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-// Helper method for ComputeValueKnownInPredecessors. If Value is a
-// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
-static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > &Result,
- Constant *Value, BasicBlock* BB){
- if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
- Result.push_back(std::make_pair(FoldedCInt, BB));
- else if (isa<UndefValue>(Value))
- Result.push_back(std::make_pair((ConstantInt*)0, BB));
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+ if (!Val)
+ return 0;
+
+ // Undef is "known" enough.
+ if (UndefValue *U = dyn_cast<UndefValue>(Val))
+ return U;
+
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+ return dyn_cast<ConstantInt>(Val);
}
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
-/// if we can infer that the value is a known ConstantInt in any of our
-/// predecessors. If so, return the known list of value and pred BB in the
-/// result vector. If a value is known to be undef, it is returned as null.
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
///
/// This returns true if there were any known values.
///
bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
// and terminate the search if we loop back to them
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
return false;
-
+
// An RAII help to remove this pair from the recursion set once the recursion
// stack pops back out again.
RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
- // If V is a constantint, then it is known in all predecessors.
- if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(V);
-
+
+ // If V is a constant, then it is known in all predecessors.
+ if (Constant *KC = getKnownConstant(V, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CI, *PI));
-
+ Result.push_back(std::make_pair(KC, *PI));
+
return true;
}
-
+
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0 || I->getParent() != BB) {
-
+
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
//
@@ -330,73 +345,73 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// able to handle value inequalities better, for example if the compare is
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
-
+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
- if (PredCst == 0 ||
- (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
- continue;
-
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
+ Result.push_back(std::make_pair(KC, P));
}
-
+
return !Result.empty();
}
-
+
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
- if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
- Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
} else {
Constant *CI = LVI->getConstantOnEdge(InVal,
PN->getIncomingBlock(i), BB);
- // LVI returns null is no value could be determined.
- if (!CI) continue;
- PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
}
}
-
+
return !Result.empty();
}
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ PredValueInfoTy LHSVals, RHSVals;
// Handle some boolean conditions.
- if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
// X | true -> true
// X & false -> false
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
-
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
+
if (LHSVals.empty() && RHSVals.empty())
return false;
-
+
ConstantInt *InterestingVal;
if (I->getOpcode() == Instruction::Or)
InterestingVal = ConstantInt::getTrue(I->getContext());
else
InterestingVal = ConstantInt::getFalse(I->getContext());
-
+
SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
-
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
+ if (LHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(LHSVals[i].first)) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+ if (RHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(RHSVals[i].first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
if (!LHSKnownBBs.count(RHSVals[i].second)) {
@@ -404,48 +419,51 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Result.back().first = InterestingVal;
}
}
-
+
return !Result.empty();
}
-
+
// Handle the NOT form of XOR.
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
if (Result.empty())
return false;
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
- if (Result[i].first)
- Result[i].first =
- cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
-
+ Result[i].first = ConstantExpr::getNot(Result[i].first);
+
return true;
}
-
+
// Try to simplify some other binary operator values.
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
+
// Try to use constant folding to simplify the binary operator.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
Constant *V = LHSVals[i].first;
- if (V == 0) V = UndefValue::get(BO->getType());
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
-
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
}
-
+
return !Result.empty();
}
-
+
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
// We can do this simplification if any comparisons fold to true or false.
@@ -454,28 +472,28 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
-
+
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
if (Res == 0) {
if (!isa<Constant>(RHS))
continue;
-
- LazyValueInfo::Tristate
+
+ LazyValueInfo::Tristate
ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
cast<Constant>(RHS), PredBB, BB);
if (ResT == LazyValueInfo::Unknown)
continue;
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
-
- if (Constant *ConstRes = dyn_cast<Constant>(Res))
- PushConstantIntOrUndef(Result, ConstRes, PredBB);
+
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
}
-
+
return !Result.empty();
}
-
-
+
+
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
@@ -494,39 +512,73 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
continue;
Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ Result.push_back(std::make_pair(ResC, P));
}
return !Result.empty();
}
-
+
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
Constant *V = LHSVals[i].first;
- if (V == 0) V = UndefValue::get(CmpConst->getType());
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
-
+
return !Result.empty();
}
}
}
-
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
+ }
+ }
+
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB);
- ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
- if (CInt) {
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CInt, *PI));
+ Result.push_back(std::make_pair(KC, *PI));
}
-
+
return !Result.empty();
}
@@ -550,10 +602,20 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
if (NumPreds < MinNumPreds)
MinSucc = i;
}
-
+
return MinSucc;
}
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
@@ -562,83 +624,91 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock())
return false;
-
+
// If this block has a single predecessor, and if that pred has a single
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
- SinglePred != BB) {
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
-
+
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
-
+
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
}
- // Look to see if the terminator is a branch of switch, if not we can't thread
- // it.
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
Value *Condition;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
// Can't thread an unconditional jump.
if (BI->isUnconditional()) return false;
Condition = BI->getCondition();
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
- else
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
return false; // Must be an invoke.
-
- // If the terminator of this block is branching on a constant, simplify the
- // terminator to an unconditional branch. This can occur due to threading in
- // other blocks.
- if (isa<ConstantInt>(Condition)) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding terminator: " << *BB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(BB);
- return true;
}
-
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
-
+
// Fold the branch/switch.
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
BBTerm->getSuccessor(i)->removePredecessor(BB, true);
}
-
+
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
return true;
}
-
+
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (getKnownConstant(Condition, Preference)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB);
+ return true;
+ }
+
Instruction *CondInst = dyn_cast<Instruction>(Condition);
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0) {
// FIXME: Unify this with code below.
- if (ProcessThreadableEdges(Condition, BB))
+ if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
return false;
- }
-
-
+ }
+
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
// For a comparison where the LHS is outside this block, it's possible
// that we've branched on it before. Used LVI to see if we can simplify
@@ -653,7 +723,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// on that edge. If they're all true or all false, we can simplify the
// branch.
// FIXME: We could handle mixed true/false by duplicating code.
- LazyValueInfo::Tristate Baseline =
+ LazyValueInfo::Tristate Baseline =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, *PI, BB);
if (Baseline != LazyValueInfo::Unknown) {
@@ -664,7 +734,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondCmp->getOperand(0), CondConst, *PI, BB);
if (Ret != Baseline) break;
}
-
+
// If we terminated early, then one of the values didn't match.
if (PI == PE) {
unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
@@ -687,174 +757,37 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
-
+
// TODO: There are other places where load PRE would be profitable, such as
// more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
-
-
+
+
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- if (ProcessThreadableEdges(CondInst, BB))
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
return true;
-
+
// If this is an otherwise-unfoldable branch on a phi node in the current
// block, see if we can simplify.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN);
-
-
+
+
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
-
-
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
-
- return false;
-}
-
-/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
-/// block that jump on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// br COND, DESTBB, BBY
-/// DESTBB:
-/// br COND, BBZ, BBW
-///
-/// If DESTBB has multiple predecessors, we can't just constant fold the branch
-/// in DESTBB, we have to thread over it.
-bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *BB) {
- BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
-
- // If both successors of PredBB go to DESTBB, we don't know anything. We can
- // fold the branch to an unconditional one, which allows other recursive
- // simplifications.
- bool BranchDir;
- if (PredBI->getSuccessor(1) != BB)
- BranchDir = true;
- else if (PredBI->getSuccessor(0) != BB)
- BranchDir = false;
- else {
- DEBUG(dbgs() << " In block '" << PredBB->getName()
- << "' folding terminator: " << *PredBB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(PredBB);
- return true;
- }
-
- BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
- // If the dest block has one predecessor, just fix the branch condition to a
- // constant and fold it.
- if (BB->getSinglePredecessor()) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding condition to '" << BranchDir << "': "
- << *BB->getTerminator() << '\n');
- ++NumFolds;
- Value *OldCond = DestBI->getCondition();
- DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- BranchDir));
- // Delete dead instructions before we fold the branch. Folding the branch
- // can eliminate edges from the CFG which can end up deleting OldCond.
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- ConstantFoldTerminator(BB);
- return true;
- }
-
-
- // Next, figure out which successor we are threading to.
- BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
-
- SmallVector<BasicBlock*, 2> Preds;
- Preds.push_back(PredBB);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, Preds, SuccBB);
-}
-
-/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
-/// block that switch on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// switch COND [... DESTBB, BBY ... ]
-/// DESTBB:
-/// switch COND [... BBZ, BBW ]
-///
-/// Optimizing switches like this is very important, because simplifycfg builds
-/// switches out of repeated 'if' conditions.
-bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *DestBB) {
- // Can't thread edge to self.
- if (PredBB == DestBB)
- return false;
-
- SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
- SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
- // There are a variety of optimizations that we can potentially do on these
- // blocks: we order them from most to least preferable.
-
- // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
- // directly to their destination. This does not introduce *any* code size
- // growth. Skip debug info first.
- BasicBlock::iterator BBI = DestBB->begin();
- while (isa<DbgInfoIntrinsic>(BBI))
- BBI++;
-
- // FIXME: Thread if it just contains a PHI.
- if (isa<SwitchInst>(BBI)) {
- bool MadeChange = false;
- // Ignore the default edge for now.
- for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
- ConstantInt *DestVal = DestSI->getCaseValue(i);
- BasicBlock *DestSucc = DestSI->getSuccessor(i);
-
- // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
- // PredSI has an explicit case for it. If so, forward. If it is covered
- // by the default case, we can't update PredSI.
- unsigned PredCase = PredSI->findCaseValue(DestVal);
- if (PredCase == 0) continue;
-
- // If PredSI doesn't go to DestBB on this value, then it won't reach the
- // case on this condition.
- if (PredSI->getSuccessor(PredCase) != DestBB &&
- DestSI->getSuccessor(i) != DestBB)
- continue;
-
- // Do not forward this if it already goes to this destination, this would
- // be an infinite loop.
- if (PredSI->getSuccessor(PredCase) == DestSucc)
- continue;
-
- // Otherwise, we're safe to make the change. Make sure that the edge from
- // DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(dbgs() << "THROUGH: " << *DestSI);
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
- // If the destination has PHI nodes, just split the edge for updating
- // simplicity.
- if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
- SplitCriticalEdge(DestSI, i, this);
- DestSucc = DestSI->getSuccessor(i);
- }
- FoldSingleEntryPHINodes(DestSucc);
- PredSI->setSuccessor(PredCase, DestSucc);
- MadeChange = true;
- }
-
- if (MadeChange)
- return true;
- }
-
return false;
}
@@ -866,13 +799,13 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Don't hack volatile loads.
if (LI->isVolatile()) return false;
-
+
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
BasicBlock *LoadBB = LI->getParent();
if (LoadBB->getSinglePredecessor())
return false;
-
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
@@ -880,17 +813,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
-
+
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal =
+ if (Value *AvailableVal =
FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
-
+
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
@@ -904,13 +837,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// might clobber its value.
if (BBIt != LoadBB->begin())
return false;
-
-
+
+
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
BasicBlock *OneUnavailablePred = 0;
-
+
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
@@ -928,23 +861,23 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
OneUnavailablePred = PredBB;
continue;
}
-
+
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
}
-
+
// If the loaded value isn't available in any predecessor, it isn't partially
// redundant.
if (AvailablePreds.empty()) return false;
-
+
// Okay, the loaded value is available in at least one (and maybe all!)
// predecessors. If the value is unavailable in more than one unique
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
BasicBlock *UnavailablePred = 0;
-
+
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
// unconditional branch, we know that it isn't a critical edge.
@@ -967,17 +900,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
-
+
if (!AvailablePredSet.count(P))
PredsToSplit.push_back(P);
}
-
+
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
"thread-pre-split", this);
}
-
+
// If the value isn't available in all predecessors, then there will be
// exactly one where it isn't available. Insert a load on that edge and add
// it to the AvailablePreds list.
@@ -989,35 +922,35 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
-
+
// Now we know that each predecessor of this block has a value in
// AvailablePreds, sort them for efficient access as we're walking the preds.
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
-
+
// Create a PHI node at the start of the block for the PRE'd load value.
PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
PN->takeName(LI);
-
+
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
BasicBlock *P = *PI;
- AvailablePredsTy::iterator I =
+ AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
std::make_pair(P, (Value*)0));
-
+
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
-
+
PN->addIncoming(I->second, I->first);
}
-
+
//cerr << "PRE: " << *LI << *PN << "\n";
-
+
LI->replaceAllUsesWith(PN);
LI->eraseFromParent();
-
+
return true;
}
@@ -1029,7 +962,7 @@ FindMostPopularDest(BasicBlock *BB,
const SmallVectorImpl<std::pair<BasicBlock*,
BasicBlock*> > &PredToDestList) {
assert(!PredToDestList.empty());
-
+
// Determine popularity. If there are multiple possible destinations, we
// explicitly choose to ignore 'undef' destinations. We prefer to thread
// blocks with known and real destinations to threading undef. We'll handle
@@ -1038,13 +971,13 @@ FindMostPopularDest(BasicBlock *BB,
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second)
DestPopularity[PredToDestList[i].second]++;
-
+
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
BasicBlock *MostPopularDest = DPI->first;
unsigned Popularity = DPI->second;
SmallVector<BasicBlock*, 4> SamePopularity;
-
+
for (++DPI; DPI != DestPopularity.end(); ++DPI) {
// If the popularity of this entry isn't higher than the popularity we've
// seen so far, ignore it.
@@ -1058,10 +991,10 @@ FindMostPopularDest(BasicBlock *BB,
SamePopularity.clear();
MostPopularDest = DPI->first;
Popularity = DPI->second;
- }
+ }
}
-
- // Okay, now we know the most popular destination. If there is more than
+
+ // Okay, now we know the most popular destination. If there is more than one
// destination, we need to determine one. This is arbitrary, but we need
// to make a deterministic decision. Pick the first one that appears in the
// successor list.
@@ -1070,105 +1003,105 @@ FindMostPopularDest(BasicBlock *BB,
TerminatorInst *TI = BB->getTerminator();
for (unsigned i = 0; ; ++i) {
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
-
+
if (std::find(SamePopularity.begin(), SamePopularity.end(),
TI->getSuccessor(i)) == SamePopularity.end())
continue;
-
+
MostPopularDest = TI->getSuccessor(i);
break;
}
}
-
+
// Okay, we have finally picked the most popular destination.
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+
+ PredValueInfoTy PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
return false;
-
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
- if (PredValues[i].first)
- dbgs() << *PredValues[i].first;
- else
- dbgs() << "UNDEF";
- dbgs() << " for pred '" << PredValues[i].second->getName()
- << "'.\n";
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
+ << *PredValues[i].first
+ << " for pred '" << PredValues[i].second->getName() << "'.\n";
});
-
+
// Decide what we want to thread through. Convert our list of known values to
// a list of known destinations for each pred. This also discards duplicate
// predecessors and keeps track of the undefined inputs (which are represented
// as a null dest in the PredToDestList).
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
-
+
BasicBlock *OnlyDest = 0;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
-
+
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
BasicBlock *Pred = PredValues[i].second;
if (!SeenPreds.insert(Pred))
continue; // Duplicate predecessor entry.
-
+
// If the predecessor ends with an indirect goto, we can't change its
// destination.
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
-
- ConstantInt *Val = PredValues[i].first;
-
+
+ Constant *Val = PredValues[i].first;
+
BasicBlock *DestBB;
- if (Val == 0) // Undef.
+ if (isa<UndefValue>(Val))
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- DestBB = BI->getSuccessor(Val->isZero());
+ DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- DestBB = SI->getSuccessor(SI->findCaseValue(Val));
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (i == 0)
+ if (PredToDestList.empty())
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
-
+
PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
-
+
// If all edges were unthreadable, we fail.
if (PredToDestList.empty())
return false;
-
+
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
-
+
if (MostPopularDest == MultipleDestSentinel)
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
-
+
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second == MostPopularDest) {
BasicBlock *Pred = PredToDestList[i].first;
-
+
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
@@ -1183,7 +1116,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
if (MostPopularDest == 0)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
-
+
// Ok, try to thread it!
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
@@ -1191,15 +1124,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
-///
+///
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
-
+
// TODO: We could make use of this to do it once for blocks with common PHI
// values.
SmallVector<BasicBlock*, 1> PredBBs;
PredBBs.resize(1);
-
+
// If any of the predecessor blocks end in an unconditional branch, we can
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
@@ -1221,21 +1154,21 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-///
+///
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
-
+
// If either the LHS or RHS of the xor is a constant, don't do this
// optimization.
if (isa<ConstantInt>(BO->getOperand(0)) ||
isa<ConstantInt>(BO->getOperand(1)))
return false;
-
+
// If the first instruction in BB isn't a phi, we won't be able to infer
// anything special about any particular predecessor.
if (!isa<PHINode>(BB->front()))
return false;
-
+
// If we have a xor as the branch input to this block, and we know that the
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
// the condition into the predecessor and fix that value to true, saving some
@@ -1254,15 +1187,17 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// %Y = icmp ne i32 %A, %B
// br i1 %Z, ...
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
assert(XorOpValues.empty());
- if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
return false;
isLHS = false;
}
-
+
assert(!XorOpValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1270,29 +1205,33 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
- if (XorOpValues[i].first->isZero())
+ if (isa<UndefValue>(XorOpValues[i].first))
+ // Ignore undefs for the count.
+ continue;
+ if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
++NumFalse;
else
++NumTrue;
}
-
+
// Determine which value to split on, true, false, or undef if neither.
ConstantInt *SplitVal = 0;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
SplitVal = ConstantInt::getFalse(BB->getContext());
-
+
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+ if (XorOpValues[i].first != SplitVal &&
+ !isa<UndefValue>(XorOpValues[i].first))
+ continue;
BlocksToFoldInto.push_back(XorOpValues[i].second);
}
-
+
// If we inferred a value for all of the predecessors, then duplication won't
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
@@ -1309,10 +1248,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// If all preds provide 1, set the computed value to 1.
BO->setOperand(!isLHS, SplitVal);
}
-
+
return true;
}
-
+
// Try to duplicate BB into PredBB.
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
@@ -1330,14 +1269,14 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
Value *IV = PN->getIncomingValueForBlock(OldPred);
-
+
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
if (I != ValueMap.end())
IV = I->second;
}
-
+
PN->addIncoming(IV, NewPred);
}
}
@@ -1345,8 +1284,8 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &PredBBs,
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -1354,7 +1293,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - would thread to self!\n");
return false;
}
-
+
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
@@ -1370,7 +1309,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1381,29 +1320,29 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// And finally, do it!
DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
-
+
LVI->threadEdge(PredBB, BB, SuccBB);
-
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
DenseMap<Instruction*, Value*> ValueMapping;
-
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
- BB->getName()+".thread",
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; !isa<TerminatorInst>(BI); ++BI) {
@@ -1411,7 +1350,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[BI] = New;
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1420,15 +1359,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setOperand(i, I->second);
}
}
-
+
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
BranchInst::Create(SuccBB, NewBB);
-
+
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1446,14 +1385,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
@@ -1462,13 +1401,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
-
+
+
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
// NewBB instead of BB. This eliminates predecessors from BB, which requires
// us to simplify any PHI nodes in BB.
@@ -1478,12 +1417,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
BB->removePredecessor(PredBB, true);
PredTerm->setSuccessor(i, NewBB);
}
-
+
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TD);
-
+
// Threaded an edge!
++NumThreads;
return true;
@@ -1507,14 +1446,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
<< "' - it might create an irreducible loop!\n");
return false;
}
-
+
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1525,35 +1464,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
-
+
// Unless PredBB ends with an unconditional branch, split the edge so that we
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
-
+
if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
-
+
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1575,7 +1514,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping[BI] = New;
}
}
-
+
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
@@ -1583,7 +1522,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping);
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1601,35 +1540,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
-
+
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
+
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
BB->removePredecessor(PredBB, true);
-
+
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
-
+
++NumDupes;
return true;
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index a584688..0786793 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,7 +43,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
@@ -83,7 +82,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
}
@@ -132,42 +131,7 @@ namespace {
///
bool inSubLoop(BasicBlock *BB) {
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
- for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
- if ((*I)->contains(BB))
- return true; // A subloop actually contains this block!
- return false;
- }
-
- /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
- /// specified exit block of the loop is dominated by the specified block
- /// that is in the body of the loop. We use these constraints to
- /// dramatically limit the amount of the dominator tree that needs to be
- /// searched.
- bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
- BasicBlock *BlockInLoop) const {
- // If the block in the loop is the loop header, it must be dominated!
- BasicBlock *LoopHeader = CurLoop->getHeader();
- if (BlockInLoop == LoopHeader)
- return true;
-
- DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
- DomTreeNode *IDom = DT->getNode(ExitBlock);
-
- // Because the exit block is not in the loop, we know we have to get _at
- // least_ its immediate dominator.
- IDom = IDom->getIDom();
-
- while (IDom && IDom != BlockInLoopNode) {
- // If we have got to the header of the loop, then the instructions block
- // did not dominate the exit node, so we can't hoist it.
- if (IDom->getBlock() == LoopHeader)
- return false;
-
- // Get next Immediate Dominator.
- IDom = IDom->getIDom();
- };
-
- return true;
+ return LI->getLoopFor(BB) != CurLoop;
}
/// sink - When an instruction is found to only be used outside of the loop,
@@ -481,7 +445,7 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -532,7 +496,7 @@ void LICM::sink(Instruction &I) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
- if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ if (!DT->dominates(InstOrigBB, ExitBlock))
continue;
// Insert the code after the last PHI node.
@@ -623,15 +587,61 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
- // For each exit block, get the DT node and walk up the DT until the
- // instruction's basic block is found or we exit the loop.
+ // Verify that the block dominates each of the exit blocks of the loop.
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+ if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
return false;
return true;
}
+namespace {
+ class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ AliasSetTracker &AST;
+ public:
+ LoopPromoter(Value *SP,
+ const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ SmallPtrSet<Value*, 4> &PMA,
+ SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), AST(ast) {}
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
+
+ virtual void doExtraRewritesBeforeFinalDeletion() const {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
+ }
+
+ virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ virtual void instructionDeleted(Instruction *I) const {
+ AST.deleteValue(I);
+ }
+ };
+} // end anon namespace
+
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
@@ -692,8 +702,11 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
if (isa<LoadInst>(Use))
assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
else if (isa<StoreInst>(Use)) {
+ // Stores *of* the pointer are not interesting, only stores *to* the
+ // pointer.
+ if (Use->getOperand(1) != ASIV)
+ continue;
assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
- if (Use->getOperand(0) == ASIV) return;
} else
return; // Not a load or store.
@@ -713,179 +726,43 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Changed = true;
++NumPromoted;
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ *CurAST);
- // It wants to know some value of the same type as what we'll be inserting.
- Value *SomeValue;
- if (isa<LoadInst>(LoopUses[0]))
- SomeValue = LoopUses[0];
- else
- SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
- SSA.Initialize(SomeValue->getType(), SomeValue->getName());
-
- // First step: bucket up uses of the pointers by the block they occur in.
- // This is important because we have to handle multiple defs/uses in a block
- // ourselves: SSAUpdater is purely for cross-block references.
- // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
- DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
- UsesByBlock[User->getParent()].push_back(User);
- }
-
- // Okay, now we can iterate over all the blocks in the loop with uses,
- // processing them. Keep track of which loads are loading a live-in value.
- SmallVector<LoadInst*, 32> LiveInLoads;
- DenseMap<Value*, Value*> ReplacedLoads;
-
- for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
- Instruction *User = LoopUses[LoopUse];
- std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
-
- // If this block has already been processed, ignore this repeat use.
- if (BlockUses.empty()) continue;
-
- // Okay, this is the first use in the block. If this block just has a
- // single user in it, we can rewrite it trivially.
- if (BlockUses.size() == 1) {
- // If it is a store, it is a trivial def of the value in the block.
- if (isa<StoreInst>(User)) {
- SSA.AddAvailableValue(User->getParent(),
- cast<StoreInst>(User)->getOperand(0));
- } else {
- // Otherwise it is a load, queue it to rewrite as a live-in load.
- LiveInLoads.push_back(cast<LoadInst>(User));
- }
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, check to see if this block is all loads. If so, we can queue
- // them all as live in loads.
- bool HasStore = false;
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
- if (isa<StoreInst>(BlockUses[i])) {
- HasStore = true;
- break;
- }
- }
-
- if (!HasStore) {
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
- LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, we have mixed loads and stores (or just a bunch of stores).
- // Since SSAUpdater is purely for cross-block values, we need to determine
- // the order of these instructions in the block. If the first use in the
- // block is a load, then it uses the live in value. The last store defines
- // the live out value. We handle this by doing a linear scan of the block.
- BasicBlock *BB = User->getParent();
- Value *StoredValue = 0;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- // If this is a load from an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(L->getOperand(0))) continue;
-
- // If we haven't seen a store yet, this is a live in use, otherwise
- // use the stored value.
- if (StoredValue) {
- L->replaceAllUsesWith(StoredValue);
- ReplacedLoads[L] = StoredValue;
- } else {
- LiveInLoads.push_back(L);
- }
- continue;
- }
-
- if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- // If this is a store to an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(S->getOperand(1))) continue;
-
- // Remember that this is the active value in the block.
- StoredValue = S->getOperand(0);
- }
- }
-
- // The last stored value that happened is the live-out for the block.
- assert(StoredValue && "Already checked that there is a store in block");
- SSA.AddAvailableValue(BB, StoredValue);
- BlockUses.clear();
- }
-
- // Now that all the intra-loop values are classified, set up the preheader.
- // It gets a load of the pointer we're promoting, and it is the live-out value
- // from the preheader.
- LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
- Preheader->getTerminator());
+ // Set up the preheader to have a definition of the value. It is the live-out
+ // value from the preheader that uses in the loop will use.
+ LoadInst *PreheaderLoad =
+ new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- // Now that the preheader is good to go, set up the exit blocks. Each exit
- // block gets a store of the live-out values that feed them. Since we've
- // already told the SSA updater about the defs in the loop and the preheader
- // definition, it is all set and we can start using it.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
- Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
- new StoreInst(LiveInValue, SomePtr, InsertPos);
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ Value *SomeValue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
+ SomeValue = LI;
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
+
+ CurAST->copyValue(SomeValue, PreheaderLoad);
}
- // Okay, now we rewrite all loads that use live-in values in the loop,
- // inserting PHI nodes as necessary.
- for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
- LoadInst *ALoad = LiveInLoads[i];
- Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
- ALoad->replaceAllUsesWith(NewVal);
- CurAST->copyValue(ALoad, NewVal);
- ReplacedLoads[ALoad] = NewVal;
- }
+ // Rewrite all the loads in the loop and remember all the definitions from
+ // stores in the loop.
+ Promoter.run(LoopUses);
// If the preheader load is itself a pointer, we need to tell alias analysis
// about the new pointer we created in the preheader block and about any PHI
// nodes that just got inserted.
if (PreheaderLoad->getType()->isPointerTy()) {
- // Copy any value stored to or loaded from a must-alias of the pointer.
- CurAST->copyValue(SomeValue, PreheaderLoad);
-
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(SomeValue, NewPHIs[i]);
- }
-
- // Now that everything is rewritten, delete the old instructions from the body
- // of the loop. They should all be dead now.
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
-
- // If this is a load that still has uses, then the load must have been added
- // as a live value in the SSAUpdate data structure for a block (e.g. because
- // the loaded value was stored later). In this case, we need to recursively
- // propagate the updates until we get to the real value.
- if (!User->use_empty()) {
- Value *NewVal = ReplacedLoads[User];
- assert(NewVal && "not a replaced load?");
-
- // Propagate down to the ultimate replacee. The intermediately loads
- // could theoretically already have been deleted, so we don't want to
- // dereference the Value*'s.
- DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
- while (RLI != ReplacedLoads.end()) {
- NewVal = RLI->second;
- RLI = ReplacedLoads.find(NewVal);
- }
-
- User->replaceAllUsesWith(NewVal);
- CurAST->copyValue(User, NewVal);
- }
-
- CurAST->deleteValue(User);
- User->eraseFromParent();
+ CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
}
// fwew, we're done!
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index cf68ddd..753a558 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,7 +52,6 @@ namespace {
AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<DominanceFrontier>();
}
};
}
@@ -78,7 +78,6 @@ bool LoopDeletion::IsLoopDead(Loop* L,
SmallVector<BasicBlock*, 4>& exitingBlocks,
SmallVector<BasicBlock*, 4>& exitBlocks,
bool &Changed, BasicBlock *Preheader) {
- BasicBlock* exitingBlock = exitingBlocks[0];
BasicBlock* exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
@@ -88,11 +87,21 @@ bool LoopDeletion::IsLoopDead(Loop* L,
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
- Value* incoming = P->getIncomingValueForBlock(exitingBlock);
+ Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+ // Make sure all exiting blocks produce the same incoming value for the exit
+ // block. If there are different incoming values for different exiting
+ // blocks, then it is impossible to statically determine which value should
+ // be used.
+ for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+ return false;
+ }
+
if (Instruction* I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
-
+
++BI;
}
@@ -147,10 +156,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
if (exitBlocks.size() != 1)
return false;
- // Loops with multiple exits are too complicated to handle correctly.
- if (exitingBlocks.size() != 1)
- return false;
-
// Finally, we have to check that the loop really is dead.
bool Changed = false;
if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
@@ -166,7 +171,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
BasicBlock* exitBlock = exitBlocks[0];
- BasicBlock* exitingBlock = exitingBlocks[0];
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
@@ -183,31 +187,31 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
+ BasicBlock* exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
P->replaceUsesOfWith(exitingBlock, preheader);
+ for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+ P->removeIncomingValue(exitingBlocks[i]);
++BI;
}
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
DominatorTree& DT = getAnalysis<DominatorTree>();
- DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
- SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
// Move all of the block's children to be children of the preheader, which
// allows us to remove the domtree entry for the block.
- ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
- for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
- if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
}
ChildNodes.clear();
DT.eraseNode(*LI);
- if (DF) DF->removeBlock(*LI);
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 0000000..f8ce214
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,606 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form. In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+// memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+// fpowi
+// Future integer operation idioms to recognize:
+// ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set. It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior. Negative strides *do* happen for memset/memcpy loops.
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+ class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ const TargetData *TD;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+
+ // Before we touch this instruction, remove it from SE!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // SCEV.
+ SE.forgetValue(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ } while (!NowDeadInsts.empty());
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ CurLoop = L;
+
+ // The trip count of the loop must be analyzable.
+ SE = &getAnalysis<ScalarEvolution>();
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return false;
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+ // If this loop executes exactly one time, then it should be peeled, not
+ // optimized by this pass.
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ if (BECst->getValue()->getValue() == 0)
+ return false;
+
+ // We require target data for now.
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD == 0) return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << L->getHeader()->getParent()->getName()
+ << "] Loop %" << L->getHeader()->getName() << "\n");
+
+ bool MadeChange = false;
+ // Scan all the blocks in the loop that are not in subloops.
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI) {
+ // Ignore blocks in subloops.
+ if (LI.getLoopFor(*BI) != CurLoop)
+ continue;
+
+ MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ }
+ return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, ExitBlocks[i]))
+ return false;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopStore(SI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the store invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (SI->isVolatile()) return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+ if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ // TODO: Could also handle negative stride here someday, that will require
+ // the validity check in mayLoopAccessLocation to be updated though.
+ // Enable this to print exact negative strides.
+ if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+ dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+ dbgs() << "BB: " << *SI->getParent();
+ }
+
+ return false;
+ }
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
+
+ // If the stored value is a strided load in the same loop with the same stride
+ // this this may be transformable into a memcpy. This kicks in for stuff like
+ // for (i) A[i] = B[i];
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+ if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ return true;
+ }
+ //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+ return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ // If we're not allowed to hack on memset, we fail.
+ if (!TLI->has(LibFunc::memset))
+ return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+ Loop *L, const SCEV *BECount,
+ unsigned StoreSize, AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
+ // Get the location that may be stored across the loop. Since the access is
+ // strided positively through memory, we say that the modified location starts
+ // at the pointer and has infinite size.
+ uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access size
+ // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI)
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore &&
+ (AA.getModRefInfo(I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (TD.isBigEndian())
+ return 0;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16) return 0;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16) return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16/Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store, for example.
+ return false;
+ }
+
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for an alias.
+ if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
+ return false;
+
+ // Okay, everything looks good, insert the memset.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(TheStore, *SE);
+ ++NumMemSet;
+ return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount) {
+ // If we're not allowed to form memcpy, we fail.
+ if (!TLI->has(LibFunc::memcpy))
+ return false;
+
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the stored location (including the load feeding the stores).
+ // Check for an alias.
+ if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // Okay, everything looks good, insert the memcpy.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ Value *LoadBasePtr =
+ Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
+
+ DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(SI, *SE);
+ ++NumMemCpy;
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 0000000..af25c5c
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+ class LoopInstSimplify : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplify() : LoopPass(ID) {
+ initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop*, LPPassManager&);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved("scalar-evolution");
+ }
+ };
+}
+
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+ // The bit we are stealing from the pointer represents whether this basic
+ // block is the header of a subloop, in which case we only process its phis.
+ typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ SmallVector<WorklistItem, 16> VisitStack;
+ SmallPtrSet<BasicBlock*, 32> Visited;
+
+ bool Changed = false;
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ VisitStack.clear();
+ Visited.clear();
+
+ VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+ while (!VisitStack.empty()) {
+ WorklistItem Item = VisitStack.pop_back_val();
+ BasicBlock *BB = Item.getPointer();
+ bool IsSubloopHeader = Item.getInt();
+
+ // Simplify instructions in the current basic block.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = BI++;
+
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't bother simplifying unused instructions.
+ if (!I->use_empty()) {
+ Value *V = SimplifyInstruction(I, TD, DT);
+ if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+
+ I->replaceAllUsesWith(V);
+ LocalChanged = true;
+ ++NumSimplified;
+ }
+ }
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+ if (IsSubloopHeader && !isa<PHINode>(I))
+ break;
+ }
+
+ // Add all successors to the worklist, except for loop exit blocks and the
+ // bodies of subloops. We visit the headers of loops so that we can process
+ // their phis, but we contract the rest of the subloop body and only follow
+ // edges leading back to the original loop.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ BasicBlock *SuccBB = *SI;
+ if (!Visited.insert(SuccBB))
+ continue;
+
+ const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB
+ && L->contains(SuccLoop)) {
+ VisitStack.push_back(WorklistItem(SuccBB, true));
+
+ SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+ for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+ BasicBlock *ExitBB = SubLoopExitBlocks[i];
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ VisitStack.push_back(WorklistItem(ExitBB, false));
+ }
+
+ continue;
+ }
+
+ bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+ ExitBlocks.end(), SuccBB);
+ if (IsExitBlock)
+ continue;
+
+ VisitStack.push_back(WorklistItem(SuccBB, false));
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 88cb9d7..95e1578 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,16 +15,16 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
#define MAX_HEADER_SIZE 16
@@ -39,14 +39,9 @@ namespace {
initializeLoopRotatePass(*PassRegistry::getPassRegistry());
}
- // Rotate Loop L as many times as possible. Return true if
- // loop is rotated at least once.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -56,27 +51,11 @@ namespace {
AU.addPreserved<ScalarEvolution>();
}
- // Helper functions
-
- /// Do actual work
- bool rotateLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool rotateLoop(Loop *L);
- /// Initialize local data
- void initialize();
-
- /// After loop rotation, loop pre-header has multiple sucessors.
- /// Insert one forwarding basic block to ensure that loop pre-header
- /// has only one successor.
- void preserveCanonicalLoopForm(LPPassManager &LPM);
-
private:
- Loop *L;
- BasicBlock *OrigHeader;
- BasicBlock *OrigPreHeader;
- BasicBlock *OrigLatch;
- BasicBlock *NewHeader;
- BasicBlock *Exit;
- LPPassManager *LPM_Ptr;
+ LoopInfo *LI;
};
}
@@ -91,48 +70,100 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
-
- bool RotatedOneLoop = false;
- initialize();
- LPM_Ptr = &LPM;
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LI = &getAnalysis<LoopInfo>();
// One loop can be rotated multiple times.
- while (rotateLoop(Lp,LPM)) {
- RotatedOneLoop = true;
- initialize();
- }
+ bool MadeChange = false;
+ while (rotateLoop(L))
+ MadeChange = true;
- return RotatedOneLoop;
+ return MadeChange;
}
-/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
- L = Lp;
-
- OrigPreHeader = L->getLoopPreheader();
- if (!OrigPreHeader) return false;
-
- OrigLatch = L->getLoopLatch();
- if (!OrigLatch) return false;
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
- OrigHeader = L->getHeader();
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+}
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
-
+
+ BasicBlock *OrigHeader = L->getHeader();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (BI == 0 || BI->isUnconditional())
+ return false;
+
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
if (!L->isLoopExiting(OrigHeader))
return false;
- BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (!BI)
- return false;
- assert(BI->isConditional() && "Branch Instruction is not conditional");
-
// Updating PHInodes in loops with multiple exits adds complexity.
// Keep it simple, and restrict loop rotation to loops with one exit only.
// In future, lift this restriction and support for multiple exits if
@@ -142,24 +173,18 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
if (ExitBlocks.size() > 1)
return false;
- // Check size of original header and reject
- // loop if it is very big.
- unsigned Size = 0;
-
- // FIXME: Use common api to estimate size.
- for (BasicBlock::const_iterator OI = OrigHeader->begin(),
- OE = OrigHeader->end(); OI != OE; ++OI) {
- if (isa<PHINode>(OI))
- continue; // PHI nodes don't count.
- if (isa<DbgInfoIntrinsic>(OI))
- continue; // Debug intrinsics don't count as size.
- ++Size;
+ // Check size of original header and reject loop if it is very big.
+ {
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader);
+ if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ return false;
}
- if (Size > MAX_HEADER_SIZE)
- return false;
-
// Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+ assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
@@ -169,8 +194,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
- Exit = BI->getSuccessor(0);
- NewHeader = BI->getSuccessor(1);
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
if (L->contains(Exit))
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
@@ -186,16 +211,16 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Begin by walking OrigHeader and populating ValueMap with an entry for
// each Instruction.
BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- DenseMap<const Value *, Value *> ValueMap;
+ ValueToValueMapTy ValueMap;
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+ ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
- TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
while (I != E) {
Instruction *Inst = I++;
@@ -207,16 +232,33 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// memory (without proving that the loop doesn't write).
if (L->hasLoopInvariantOperands(Inst) &&
!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
- !isa<TerminatorInst>(Inst)) {
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
continue;
}
// Otherwise, create a duplicate of the instruction.
Instruction *C = Inst->clone();
- C->setName(Inst->getName());
- C->insertBefore(LoopEntryBranch);
- ValueMap[Inst] = C;
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ delete C;
+ ValueMap[Inst] = V;
+ } else {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[Inst] = C;
+ }
}
// Along with all the other instructions, we just cloned OrigHeader's
@@ -226,221 +268,81 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
// Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
// OrigPreHeader's old terminator (the original branch into the loop), and
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
- for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
-
- // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
- // as necessary.
- SSAUpdater SSA;
- for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
- Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
-
- // The value now exits in two versions: the initial value in the preheader
- // and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
- SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
- SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
-
- // Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end(); UI != UE; ) {
- // Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
-
- // Increment the iterator before removing the use from the list.
- ++UI;
- // SSAUpdater can't handle a non-PHI use in the same block as an
- // earlier def. We can easily handle those cases manually.
- Instruction *UserInst = cast<Instruction>(U.getUser());
- if (!isa<PHINode>(UserInst)) {
- BasicBlock *UserBB = UserInst->getParent();
-
- // The original users in the OrigHeader are already using the
- // original definitions.
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped.
- if (UserBB == OrigPreHeader) {
- U = OrigPreHeaderVal;
- continue;
- }
- }
-
- // Anything else can be handled by SSAUpdater.
- SSA.RewriteUse(U);
- }
- }
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
- // Move the original header to the bottom of the loop, where it now more
- // naturally belongs. This isn't necessary for correctness, and CodeGen can
- // usually reorder blocks on its own to fix things like this up, but it's
- // still nice to keep the IR readable.
- //
- // The original header should have only one predecessor at this point, since
- // we checked that the loop had a proper preheader and unique backedge before
- // we started.
- assert(OrigHeader->getSinglePredecessor() &&
- "Original loop header has too many predecessors after loop rotation!");
- OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
-
- // Also, since this original header only has one predecessor, zap its
- // PHI nodes, which are now trivial.
- FoldSingleEntryPHINodes(OrigHeader);
-
- // TODO: We could just go ahead and merge OrigHeader into its predecessor
- // at this point, if we don't mind updating dominator info.
-
- // Establish a new preheader, update dominators, etc.
- preserveCanonicalLoopForm(LPM);
-
- ++NumRotated;
- return true;
-}
-
-/// Initialize local data
-void LoopRotate::initialize() {
- L = NULL;
- OrigHeader = NULL;
- OrigPreHeader = NULL;
- NewHeader = NULL;
- Exit = NULL;
-}
-
-/// After loop rotation, loop pre-header has multiple sucessors.
-/// Insert one forwarding basic block to ensure that loop pre-header
-/// has only one successor.
-void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
-
- // Right now original pre-header has two successors, new header and
- // exit block. Insert new block between original pre-header and
- // new header such that loop's new pre-header has only one successor.
- BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
- "bb.nph",
- OrigHeader->getParent(),
- NewHeader);
- LoopInfo &LI = getAnalysis<LoopInfo>();
- if (Loop *PL = LI.getLoopFor(OrigPreHeader))
- PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
- BranchInst::Create(NewHeader, NewPreHeader);
- BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
- if (OrigPH_BI->getSuccessor(0) == NewHeader)
- OrigPH_BI->setSuccessor(0, NewPreHeader);
- else {
- assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
- "Unexpected original pre-header terminator");
- OrigPH_BI->setSuccessor(1, NewPreHeader);
- }
-
- PHINode *PN;
- for (BasicBlock::iterator I = NewHeader->begin();
- (PN = dyn_cast<PHINode>(I)); ++I) {
- int index = PN->getBasicBlockIndex(OrigPreHeader);
- assert(index != -1 && "Expected incoming value from Original PreHeader");
- PN->setIncomingBlock(index, NewPreHeader);
- assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
- "Expected only one incoming value from Original PreHeader");
- }
-
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- DT->addNewBlock(NewPreHeader, OrigPreHeader);
- DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
- DT->changeImmediateDominator(Exit, OrigPreHeader);
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (L->getHeader() != B) {
- DomTreeNode *Node = DT->getNode(B);
- if (Node && Node->getBlock() == OrigHeader)
- DT->changeImmediateDominator(*BI, L->getHeader());
- }
- }
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
- }
-
- if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
- // New Preheader's dominance frontier is Exit block.
- DominanceFrontier::DomSetType NewPHSet;
- NewPHSet.insert(Exit);
- DF->addBasicBlock(NewPreHeader, NewPHSet);
-
- // New Header's dominance frontier now includes itself and Exit block
- DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
- if (HeadI != DF->end()) {
- DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
- HeaderSet.clear();
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType HeaderSet;
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), HeaderSet);
- }
-
- // Original header (new Loop Latch)'s dominance frontier is Exit.
- DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
- if (LatchI != DF->end()) {
- DominanceFrontier::DomSetType &LatchSet = LatchI->second;
- LatchSet = LatchI->second;
- LatchSet.clear();
- LatchSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType LatchSet;
- LatchSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), LatchSet);
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+ != NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Update DominatorTree to reflect the CFG change we just made. Then split
+ // edges as necessary to preserve LoopSimplify form.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Since OrigPreheader now has the conditional branch to Exit block, it is
+ // the dominator of Exit.
+ DT->changeImmediateDominator(Exit, OrigPreheader);
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
-
- // If a loop block dominates new loop latch then add to its frontiers
- // new header and Exit and remove new latch (which is equal to original
- // header).
- BasicBlock *NewLatch = L->getLoopLatch();
-
- assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
-
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore. Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only one
+ // predecessor.
+ BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+ ExitSplit->moveBefore(Exit);
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst::Create(NewHeader, PHBI);
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (DT->dominates(B, NewLatch)) {
- DominanceFrontier::iterator BDFI = DF->find(B);
- if (BDFI != DF->end()) {
- DominanceFrontier::DomSetType &BSet = BDFI->second;
- BSet.erase(NewLatch);
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType BSet;
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- DF->addBasicBlock(B, BSet);
- }
- }
- }
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
}
-
- // Preserve canonical loop form, which means Exit block should
- // have only one predecessor.
- SplitEdge(L->getLoopLatch(), Exit, this);
-
- assert(NewHeader && L->getHeader() == NewHeader &&
- "Invalid loop header after loop rotation");
- assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
- "Invalid loop preheader after loop rotation");
- assert(L->getLoopLatch() &&
- "Invalid loop latch after loop rotation");
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, this);
+
+ ++NumRotated;
+ return true;
}
+
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3a4108a..ac4aea2 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -63,6 +63,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -210,8 +211,7 @@ struct Formula {
Formula() : ScaledReg(0) {}
- void InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
const Type *getType() const;
@@ -232,9 +232,9 @@ struct Formula {
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE) {
// Collect expressions which properly dominate the loop header.
- if (S->properlyDominates(L->getHeader(), &DT)) {
+ if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
@@ -243,18 +243,18 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
// Look at addrec operands.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
- DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- L, Good, Bad, SE, DT);
+ L, Good, Bad, SE);
return;
}
@@ -266,7 +266,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
- DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
@@ -286,11 +286,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
/// attempting to keep all loop-invariant and loop-computable values in a
/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
- DoInitialMatch(S, L, Good, Bad, SE, DT);
+ DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
@@ -730,7 +729,7 @@ void Cost::RateRegister(const SCEV *Reg,
++SetupCost;
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
- Reg->hasComputableLoopEvolution(L);
+ SE.hasComputableLoopEvolution(Reg, L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -2056,7 +2055,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (N->isLoopInvariant(L)) {
+ if (SE.isLoopInvariant(N, L)) {
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -2096,7 +2095,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
- F.InitialMatch(S, L, SE, DT);
+ F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
@@ -2196,7 +2195,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
- if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+ if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
@@ -2279,7 +2278,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
// Don't pull a constant into a register if the constant could be folded
@@ -2330,8 +2329,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
- if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
- !BaseReg->hasComputableLoopEvolution(L))
+ if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+ !SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
@@ -3545,21 +3544,23 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ !isa<IndirectBrInst>(BB->getTerminator())) {
+ Loop *PNLoop = LI.getLoopFor(PN->getParent());
+ if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
@@ -3815,7 +3816,6 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved("domfrontier");
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
@@ -3824,6 +3824,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
+ // Requiring LoopSimplify a second time here prevents IVUsers from running
+ // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 65e576b..80b263a 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,7 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -99,21 +99,6 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
unsigned LoopSize = Metrics.NumInsts;
- // If we can identify the induction variable, we know that it will become
- // constant when we unroll the loop, so factor that into our loop size
- // estimate.
- // FIXME: We have to divide by InlineConstants::InstrCost because the
- // measure returned by CountCodeReductionForConstant is not an instruction
- // count, but rather a weight as defined by InlineConstants. It would
- // probably be a good idea to standardize on a single weighting scheme by
- // pushing more of the logic for weighting into CodeMetrics.
- if (PHINode *IndVar = L->getCanonicalInductionVariable()) {
- unsigned SizeDecrease = Metrics.CountCodeReductionForConstant(IndVar);
- // NOTE: Because SizeDecrease is a fuzzy estimate, we don't want to allow
- // it to totally negate the cost of unrolling a loop.
- SizeDecrease = SizeDecrease > LoopSize / 2 ? LoopSize / 2 : SizeDecrease;
- }
-
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
// not a problem for code quality.
@@ -123,7 +108,6 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
-
LoopInfo *LI = &getAnalysis<LoopInfo>();
BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 009ee7b..b4e3d31 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,12 +32,12 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -111,6 +111,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
}
private:
@@ -458,19 +459,6 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
return true;
}
-// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by VMap.
-//
-static inline void RemapInstruction(Instruction *I,
- ValueToValueMapTy &VMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
- ValueToValueMapTy::iterator It = VMap.find(Op);
- if (It != VMap.end()) Op = It->second;
- I->setOperand(op, Op);
- }
-}
-
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
@@ -588,6 +576,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
LoopBlocks.clear();
NewBlocks.clear();
@@ -665,7 +656,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -969,13 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
-
- // Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I)) {
- ReplaceUsesOfWith(I, C, Worklist, L, LPM);
- continue;
- }
-
+
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
DEBUG(dbgs() << "Remove dead instruction '" << *I);
@@ -990,15 +975,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
++NumSimplify;
continue;
}
-
+
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I, 0, DT)) {
- ReplaceUsesOfWith(I, V, Worklist, L, LPM);
- continue;
- }
-
+ if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (LI->replacementPreservesLCSSAForm(I, V)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
// Special case hacks that appear commonly in unswitched code.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (BI->isUnconditional()) {
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index f4876ea..bde0e53 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,16 +14,18 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include <list>
@@ -32,62 +34,10 @@ using namespace llvm;
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
-
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with. This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
- LLVMContext &Context = V->getContext();
-
- // All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isIntegerTy(8)) return V;
-
- // Constant float and double values can be handled as integer values if the
- // corresponding integer value is "byteable". An important case is 0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- if (CFP->getType()->isFloatTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
- if (CFP->getType()->isDoubleTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
- // Don't handle long double formats, which have strange constraints.
- }
-
- // We can handle constant integers that are power of two in size and a
- // multiple of 8 bits.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- unsigned Width = CI->getBitWidth();
- if (isPowerOf2_32(Width) && Width > 8) {
- // We can handle this value if the recursive binary decomposition is the
- // same at all levels.
- APInt Val = CI->getValue();
- APInt Val2;
- while (Val.getBitWidth() != 8) {
- unsigned NextWidth = Val.getBitWidth()/2;
- Val2 = Val.lshr(NextWidth);
- Val2.trunc(Val.getBitWidth()/2);
- Val.trunc(Val.getBitWidth()/2);
-
- // If the top/bottom halves aren't the same, reject it.
- if (Val != Val2)
- return 0;
- }
- return ConstantInt::get(Context, Val);
- }
- }
-
- // Conceptually, we could handle things like:
- // %a = zext i8 %X to i16
- // %b = shl i16 %a, 8
- // %c = or i16 %a, %b
- // but until there is an example that actually needs this, it doesn't seem
- // worth worrying about.
- return 0;
-}
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
- bool &VariableIdxFound, TargetData &TD) {
+ bool &VariableIdxFound, const TargetData &TD){
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -120,14 +70,31 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- TargetData &TD) {
+ const TargetData &TD) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+
+ bool VariableIdxFound = false;
+
+ // If one pointer is a GEP and the other isn't, then see if the GEP is a
+ // constant offset from the base, as in "P" and "gep P, 1".
+ if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
// offset, which determines their offset from each other. At this point, we
// handle no other case.
- GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
- GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
@@ -137,7 +104,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- bool VariableIdxFound = false;
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
@@ -171,7 +137,7 @@ struct MemsetRange {
unsigned Alignment;
/// TheStores - The actual stores that make up this range.
- SmallVector<StoreInst*, 16> TheStores;
+ SmallVector<Instruction*, 16> TheStores;
bool isProfitableToUseMemset(const TargetData &TD) const;
@@ -181,10 +147,19 @@ struct MemsetRange {
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If we found more than 8 stores to merge or 64 bytes, use memset.
if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // If there is nothing to merge, don't do anything.
+ if (TheStores.size() < 2) return false;
+
+ // If any of the stores are a memset, then it is always good to extend the
+ // memset.
+ for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+ if (!isa<StoreInst>(TheStores[i]))
+ return true;
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
- if (TheStores.size() <= 2) return false;
+ if (TheStores.size() == 2) return false;
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
@@ -215,31 +190,53 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- TargetData &TD;
+ const TargetData &TD;
public:
- MemsetRanges(TargetData &td) : TD(td) {}
+ MemsetRanges(const TargetData &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
- void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+ void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ addStore(OffsetFromFirst, SI);
+ else
+ addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+ }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+ int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ addRange(OffsetFromFirst, StoreSize,
+ SI->getPointerOperand(), SI->getAlignment(), SI);
+ }
+
+ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+ int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ }
+
+ void addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst);
+
};
} // end anon namespace
-/// addStore - Add a new store to the MemsetRanges data structure. This adds a
+/// addRange - Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
- int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
-
- // Do a linear search of the ranges to see if this can be joined and/or to
- // find the insertion point in the list. We keep the ranges sorted for
- // simplicity here. This is a linear search of a linked list, which is ugly,
- // however the number of ranges is limited, so this won't get crazy slow.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list. We keep the ranges sorted for
+/// simplicity here. This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst) {
+ int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
@@ -252,14 +249,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
- R.StartPtr = SI->getPointerOperand();
- R.Alignment = SI->getAlignment();
- R.TheStores.push_back(SI);
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
+ R.TheStores.push_back(Inst);
return;
}
-
+
// This store overlaps with I, add it.
- I->TheStores.push_back(SI);
+ I->TheStores.push_back(Inst);
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
@@ -274,8 +271,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
// stopped on *it*.
if (Start < I->Start) {
I->Start = Start;
- I->StartPtr = SI->getPointerOperand();
- I->Alignment = SI->getAlignment();
+ I->StartPtr = Ptr;
+ I->Alignment = Alignment;
}
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -301,13 +298,17 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
namespace {
class MemCpyOpt : public FunctionPass {
- bool runOnFunction(Function &F);
+ MemoryDependenceAnalysis *MD;
+ const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
}
+ bool runOnFunction(Function &F);
+
private:
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -321,10 +322,17 @@ namespace {
// Helper fuctions
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
uint64_t cpyLen, CallInst *C);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
+ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+ Value *ByteVal);
+
bool iterateOnFunction(Function &F);
};
@@ -342,183 +350,182 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-/// processStore - When GVN is scanning forward over instructions, we look for
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consequtive ones
-/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
-
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
-
- // Detect cases where we're performing call slot forwarding, but
- // happen to be using a load-store pair to implement it, rather than
- // a memcpy.
- if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse()) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- MemDepResult dep = MD.getDependency(LI);
- CallInst *C = 0;
- if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
- C = dyn_cast<CallInst>(dep.getInst());
-
- if (C) {
- bool changed = performCallSlotOptzn(LI,
- SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
- if (changed) {
- MD.removeInstruction(SI);
- SI->eraseFromParent();
- LI->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- }
+/// neighboring locations of memory. If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr, Value *ByteVal) {
+ if (TD == 0) return 0;
- LLVMContext &Context = SI->getContext();
-
- // There are two cases that are interesting for this code to handle: memcpy
- // and memset. Right now we only handle memset.
-
- // Ensure that the value being stored is something that can be memset'able a
- // byte at a time like "0" or "-1" or any width, as well as things like
- // 0xA0A0A0A0 and 0.0.
- Value *ByteVal = isBytewiseValue(SI->getOperand(0));
- if (!ByteVal)
- return false;
-
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- Module *M = SI->getParent()->getParent()->getParent();
-
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(*TD);
- Value *StartPtr = SI->getPointerOperand();
-
- BasicBlock::iterator BI = SI;
+ BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
- if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
- // If the call is readnone, ignore it, otherwise bail out. We don't even
- // allow readonly here because we don't want something like:
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite(BI)) ==
- AliasAnalysis::DoesNotAccessMemory)
- continue;
-
- // TODO: If this is a memset, try to join it in.
-
- break;
- } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
- break;
-
- // If this is a non-store instruction it is fine, ignore it.
- StoreInst *NextStore = dyn_cast<StoreInst>(BI);
- if (NextStore == 0) continue;
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
- // If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
- // Check to see if this stored value is of the same byte-splattable value.
- if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
- break;
-
- // Check to see if this store is to a constant offset from the start ptr.
- int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
- break;
-
- Ranges.addStore(Offset, NextStore);
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+ Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+ !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ break;
+
+ Ranges.addMemSet(Offset, MSI);
+ }
}
-
+
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return false;
+ return 0;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
// interesting as a small compile-time optimization.
- Ranges.addStore(0, SI);
-
-
+ Ranges.addInst(0, StartInst);
+
+ // If we create any memsets, we put it right before the first instruction that
+ // isn't part of the memset block. This ensure that the memset is dominated
+ // by any addressing instruction needed by the start of the block.
+ IRBuilder<> Builder(BI);
+
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- bool MadeChange = false;
+ Instruction *AMemSet = 0;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
-
+
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(*TD))
continue;
- // Otherwise, we do want to transform this! Create a new memset. We put
- // the memset right before the first instruction that isn't part of this
- // memset block. This ensure that the memset is dominated by any addressing
- // instruction needed by the start of the block.
- BasicBlock::iterator InsertPt = BI;
-
+ // Otherwise, we do want to transform this! Create a new memset.
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
const Type *EltType =
- cast<PointerType>(StartPtr->getType())->getElementType();
+ cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
-
- // Cast the start ptr to be i8* as memset requires.
- const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
- const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
- StartPTy->getAddressSpace());
- if (StartPTy!= i8Ptr)
- StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
- InsertPt);
-
- Value *Ops[] = {
- StartPtr, ByteVal, // Start, value
- // size
- ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
- // align
- ConstantInt::get(Type::getInt32Ty(Context), Alignment),
- // volatile
- ConstantInt::get(Type::getInt1Ty(Context), 0),
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
-
- Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
-
- Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
+
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i];
- dbgs() << "With: " << *C); C=C;
-
- // Don't invalidate the iterator
- BBI = BI;
-
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
// Zap all the stores.
- for (SmallVector<StoreInst*, 16>::const_iterator
+ for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI)
+ SE = Range.TheStores.end(); SI != SE; ++SI) {
+ MD->removeInstruction(*SI);
(*SI)->eraseFromParent();
+ }
++NumMemSetInfer;
- MadeChange = true;
}
- return MadeChange;
+ return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (SI->isVolatile()) return false;
+
+ if (TD == 0) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse()) {
+ MemDepResult dep = MD->getDependency(LI);
+ CallInst *C = 0;
+ if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+ C = dyn_cast<CallInst>(dep.getInst());
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+ ByteVal)) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+
+ return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+ // See if there is another memset or store neighboring this memset which
+ // allows us to widen out the memset to do a single larger store.
+ if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+ if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+ MSI->getValue())) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+ return false;
}
@@ -552,8 +559,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
+ if (TD == 0) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -631,8 +637,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpyDest, srcSize) !=
- AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -655,113 +660,142 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- MD.removeInstruction(C);
+ MD->removeInstruction(C);
- // Remove the memcpy
- MD.removeInstruction(cpy);
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
++NumMemCpyInstr;
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
-/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
-/// B to be a memcpy from X to Z (or potentially a memmove, depending on
-/// circumstances). This allows later passes to remove the first memcpy
-/// altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // We can only optimize statically-sized memcpy's.
- ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
- if (!cpyLen) return false;
-
- // The are two possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- MemDepResult dep = MD.getDependency(M);
- if (!dep.isClobber())
- return false;
- if (!isa<MemCpyInst>(dep.getInst())) {
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
- bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
- cpyLen->getZExtValue(), C);
- if (changed) M->eraseFromParent();
- return changed;
- }
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
+/// copy from MDep's input if we can. MSize is the size of M's copy.
+///
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other.
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
- }
- MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
-
- // We can only transforms memcpy's where the dest of one is the source of the
- // other
- if (M->getSource() != MDep->getDest())
+ // If dep instruction is reading from our current input, then it is a noop
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
+ // memcpy(a <- a)
+ // memcpy(b <- a)
+ if (M->getSource() == MDep->getSource())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
// must be larger than the following one.
- ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
- if (!C1 || !C2)
- return false;
-
- uint64_t DepSize = C1->getValue().getZExtValue();
- uint64_t CpySize = C2->getValue().getZExtValue();
-
- if (DepSize < CpySize)
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- // Finally, we have to make sure that the dest of the second does not
- // alias the source of the first
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
- AliasAnalysis::NoAlias)
- return false;
- else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
- AliasAnalysis::NoAlias)
- return false;
- else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
- != AliasAnalysis::NoAlias)
+
+ // Verify that the copied-from memory doesn't change in between the two
+ // transfers. For example, in:
+ // memcpy(a <- b)
+ // *b = 42;
+ // memcpy(c <- a)
+ // It would be invalid to transform the second memcpy into memcpy(c <- b).
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+ false, M, M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
- // If all checks passed, then we can transform these memcpy's
- const Type *ArgTys[3] = { M->getRawDest()->getType(),
- MDep->getRawSource()->getType(),
- M->getLength()->getType() };
- Function *MemCpyFun = Intrinsic::getDeclaration(
- M->getParent()->getParent()->getParent(),
- M->getIntrinsicID(), ArgTys, 3);
-
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ UseMemMove = true;
+
+ // If all checks passed, then we can transform M.
+
// Make sure to use the lesser of the alignment of the source and the dest
// since we're changing where we're reading from, but don't want to increase
// the alignment past what can be read from or written to.
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
- unsigned Align = std::min(MDep->getAlignmentCst()->getZExtValue(),
- M->getAlignmentCst()->getZExtValue());
- LLVMContext &Context = M->getContext();
- ConstantInt *AlignCI = ConstantInt::get(Type::getInt32Ty(Context), Align);
- Value *Args[5] = {
- M->getRawDest(), MDep->getRawSource(), M->getLength(),
- AlignCI, M->getVolatileCst()
- };
- CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
-
- // If C and M don't interfere, then this is a valid transformation. If they
- // did, this would mean that the two sources overlap, which would be bad.
- if (MD.getDependency(C) == dep) {
- MD.removeInstruction(M);
+ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+
+ IRBuilder<> Builder(M);
+ if (UseMemMove)
+ Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+ else
+ Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+
+ // Remove the instruction we're replacing.
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
+
+ // If the source and destination of the memcpy are the same, then zap it.
+ if (M->getSource() == M->getDest()) {
+ MD->removeInstruction(M);
M->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
+ return false;
+ }
+
+ // If copying from a constant, try to turn the memcpy into a memset.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ IRBuilder<> Builder(M);
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ M->getAlignment(), false);
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (!DepInfo.isClobber())
+ return false;
+
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
- // Otherwise, there was no point in doing this, so we remove the call we
- // inserted and act like nothing happened.
- MD.removeInstruction(C);
- C->eraseFromParent();
return false;
}
@@ -770,15 +804,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- // If the memmove is a constant size, use it for the alias query, this allows
- // us to optimize things like: memmove(P, P+64, 64);
- uint64_t MemMoveSize = AliasAnalysis::UnknownSize;
- if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
- MemMoveSize = Len->getZExtValue();
-
// See if the pointers alias.
- if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
- AliasAnalysis::NoAlias)
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
@@ -793,33 +820,107 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
}
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ if (TD == 0) return false;
+
+ // Find out what feeds this byval argument.
+ Value *ByValArg = CS.getArgument(ArgNo);
+ const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ MemDepResult DepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+ true, CS.getInstruction(),
+ CS.getInstruction()->getParent());
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ return false;
+
+ // Get the alignment of the byval. If it is greater than the memcpy, then we
+ // can't do the substitution. If the call doesn't specify the alignment, then
+ // it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+ false, CS.getInstruction(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
// Walk all instruction in the function.
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE;) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
+ bool RepeatInstruction = false;
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
+ else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ RepeatInstruction = processMemSet(M, BI);
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
- MadeChange |= processMemCpy(M);
- else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
- if (processMemMove(M)) {
- --BI; // Reprocess the new memcpy.
- MadeChange = true;
- }
+ RepeatInstruction = processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ RepeatInstruction = processMemMove(M);
+ else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ if (BI != BB->begin()) --BI;
+ MadeChange = true;
}
}
}
@@ -832,14 +933,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
//
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
+ MD = 0;
return MadeChange;
}
-
-
-
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 46b7f95..e093b52 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -240,6 +240,12 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
RHS->setOperand(0, LHS);
I->setOperand(0, RHS);
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+ LHS->clearSubclassOptionalData();
+ RHS->clearSubclassOptionalData();
+
++NumLinear;
MadeChange = true;
DEBUG(dbgs() << "Linearized: " << *I << '\n');
@@ -341,6 +347,12 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(0, Ops[i].Op);
I->setOperand(1, Ops[i+1].Op);
+
+ // Clear all the optional flags, which may not hold after the
+ // reassociation if the expression involved more than just this operation.
+ if (Ops.size() != 2)
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -356,6 +368,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
if (I->getOperand(1) != Ops[i].Op) {
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(1, Ops[i].Op);
+
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -811,16 +828,23 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
SmallVector<Value*, 4> NewMulOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
continue;
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
- NewMulOps.push_back(V);
- Ops.erase(Ops.begin()+i);
- --i; --e;
+ // The factorized operand may occur several times. Convert them all in
+ // one fell swoop.
+ for (unsigned j = Ops.size(); j != i;) {
+ --j;
+ if (Ops[j].Op == Ops[i].Op) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+j);
+ }
+ }
+ --i;
}
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 621508f..c82e929 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -481,6 +481,19 @@ private:
}
}
+ /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+ /// map for I and PN, but if one is there already, do not create another.
+ /// (Duplicate entries do not break anything directly, but can lead to
+ /// exponential growth of the table in rare cases.)
+ void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ std::multimap<PHINode*, Instruction*>::iterator J, E;
+ tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (; J != E; ++J)
+ if (J->second == I)
+ return;
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+ }
+
private:
friend class InstVisitor<SCCPSolver>;
@@ -973,9 +986,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
if (Result.isConstant()) {
markConstant(IV, &I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
- // operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ // operands.
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1056,8 +1069,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
markConstant(&I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
// operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1593,10 +1606,6 @@ namespace {
// algorithm, and return true if the function was modified.
//
bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- }
};
} // end anonymous namespace
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index c015093..678ef0d 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -34,25 +34,27 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDCEPass(Registry);
initializeDeadInstEliminationPass(Registry);
initializeDSEPass(Registry);
- initializeGEPSplitterPass(Registry);
initializeGVNPass(Registry);
+ initializeEarlyCSEPass(Registry);
initializeIndVarSimplifyPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLICMPass(Registry);
initializeLoopDeletionPass(Registry);
+ initializeLoopInstSimplifyPass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnswitchPass(Registry);
+ initializeLoopIdiomRecognizePass(Registry);
initializeLowerAtomicPass(Registry);
initializeMemCpyOptPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
- initializeSROAPass(Registry);
+ initializeSROA_DTPass(Registry);
+ initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeSimplifyHalfPowrLibCallsPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
initializeTailDupPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 8b4f355..c3ca852 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -31,29 +31,34 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(ID) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
+ SROA(int T, bool hasDT, char &ID)
+ : FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
@@ -65,17 +70,10 @@ namespace {
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
- // getAnalysisUsage - This pass does not require any passes, but we know it
- // will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.setPreservesCFG();
- }
-
private:
+ bool HasDomTree;
TargetData *TD;
-
+
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
SmallVector<Value*, 32> DeadInsts;
@@ -84,39 +82,61 @@ namespace {
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
struct AllocaInfo {
+ /// The alloca to promote.
+ AllocaInst *AI;
+
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
-
+
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
/// isMemCpyDst - This is true if this aggregate is memcpy'd into.
bool isMemCpyDst : 1;
- AllocaInfo()
- : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+ /// hasSubelementAccess - This is true if a subelement of the alloca is
+ /// ever accessed, or false if the alloca is only accessed with mem
+ /// intrinsics or load/store that only access the entire alloca at once.
+ bool hasSubelementAccess : 1;
+
+ /// hasALoadOrStore - This is true if there are any loads or stores to it.
+ /// The alloca may just be accessed with memcpy, for example, which would
+ /// not set this.
+ bool hasALoadOrStore : 1;
+
+ explicit AllocaInfo(AllocaInst *ai)
+ : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+ hasSubelementAccess(false), hasALoadOrStore(false) {}
};
-
+
unsigned SRThreshold;
- void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+ void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+ I.isUnsafe = true;
+ DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
+ }
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
- void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- AllocaInfo &Info);
- void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
- AllocaInfo &Info);
- void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+ void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
const Type *&IdxTy);
-
- void DoScalarReplacement(AllocaInst *AI,
+
+ void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -130,22 +150,63 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
-
+
static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
};
+
+ // SROA_DT - SROA that uses DominatorTree.
+ struct SROA_DT : public SROA {
+ static char ID;
+ public:
+ SROA_DT(int T = -1) : SROA(T, true, ID) {
+ initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+ };
+
+ // SROA_SSAUp - SROA that uses SSAUpdater.
+ struct SROA_SSAUp : public SROA {
+ static char ID;
+ public:
+ SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+
}
-char SROA::ID = 0;
-INITIALIZE_PASS_BEGIN(SROA, "scalarrepl",
- "Scalar Replacement of Aggregates", false, false)
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
-INITIALIZE_PASS_END(SROA, "scalarrepl",
- "Scalar Replacement of Aggregates", false, false)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
// Public interface to the ScalarReplAggregates pass
-FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
- return new SROA(Threshold);
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+ bool UseDomTree) {
+ if (UseDomTree)
+ return new SROA_DT(Threshold);
+ return new SROA_SSAUp(Threshold);
}
@@ -161,16 +222,16 @@ class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered.
unsigned AllocaSize;
const TargetData &TD;
-
+
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
bool IsNotTrivial;
-
+
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
const Type *VectorTy;
-
+
/// HadAVector - True if there is at least one vector access to the alloca.
/// We don't want to turn random arrays into vectors and use vector element
/// insert/extract, but if there are element accesses to something that is
@@ -184,14 +245,14 @@ public:
VectorTy = 0;
HadAVector = false;
}
-
+
AllocaInst *TryConvert(AllocaInst *AI);
-
+
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
void MergeInType(const Type *In, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-
+
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
@@ -200,26 +261,6 @@ private:
} // end anonymous namespace.
-/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
-/// allowed to form. We do this to avoid MMX types, which is a complete hack,
-/// but is required until the backend is fixed.
-static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
- StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
- if (!Triple.startswith("i386") &&
- !Triple.startswith("x86_64"))
- return false;
-
- // Reject all the MMX vector types.
- switch (VTy->getNumElements()) {
- default: return false;
- case 1: return VTy->getElementType()->isIntegerTy(64);
- case 2: return VTy->getElementType()->isIntegerTy(32);
- case 4: return VTy->getElementType()->isIntegerTy(16);
- case 8: return VTy->getElementType()->isIntegerTy(8);
- }
-}
-
-
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -228,7 +269,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// out.
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
return 0;
-
+
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
@@ -236,8 +277,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
- !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -268,7 +308,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
return;
-
+
// If this could be contributing to a vector, analyze it.
// If the In type is a vector that is the same size as the alloca, see if it
@@ -276,7 +316,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
// Remember if we saw a vector type.
HadAVector = true;
-
+
if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
// If we're storing/loading a vector of the right size, allow it as a
// vector. If this the first vector we see, remember the type so that
@@ -295,7 +335,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// compatible with it.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
+ (VectorTy == 0 ||
cast<VectorType>(VectorTy)->getElementType()
->getPrimitiveSizeInBits()/8 == EltSize)) {
if (VectorTy == 0)
@@ -303,7 +343,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
return;
}
}
-
+
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
VectorTy = Type::getVoidTy(In->getContext());
@@ -321,7 +361,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
if (LI->isVolatile())
@@ -332,7 +372,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
MergeInType(LI->getType(), Offset);
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || SI->isVolatile()) return false;
@@ -342,7 +382,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
MergeInType(SI->getOperand(0)->getType(), Offset);
continue;
}
-
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
@@ -354,7 +394,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a GEP with a variable indices, we can't handle it.
if (!GEP->hasAllConstantIndices())
return false;
-
+
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
@@ -383,15 +423,15 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
-
+
IsNotTrivial = true; // Can't be mem2reg'd.
continue;
}
-
+
// Otherwise, we cannot handle this!
return false;
}
-
+
return true;
}
@@ -422,9 +462,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
GEP->eraseFromParent();
continue;
}
-
- IRBuilder<> Builder(User->getParent(), User);
-
+
+ IRBuilder<> Builder(User);
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
@@ -434,7 +474,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LI->eraseFromParent();
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
@@ -442,14 +482,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
-
+
// If the load we just inserted is now dead, then the inserted store
// overwrote the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
continue;
}
-
+
// If this is a constant sized memset of a constant value (e.g. 0) we can
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
@@ -457,7 +497,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if (NumBytes != 0) {
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-
+
// Compute the value replicated the right number of times.
APInt APVal(NumBytes*8, Val);
@@ -465,17 +505,17 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (Val)
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
-
+
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
-
+
// If the load we just inserted is now dead, then the memset overwrote
// the entire thing.
if (Old->use_empty())
- Old->eraseFromParent();
+ Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
@@ -485,29 +525,42 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
-
+
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-
- if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-
+ const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ SPTy->getAddressSpace());
+ }
+ SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ DPTy->getAddressSpace());
+ }
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
NewStore->setAlignment(MTI->getAlignment());
} else {
@@ -517,7 +570,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
MTI->eraseFromParent();
continue;
}
-
+
llvm_unreachable("Unsupported operation!");
}
}
@@ -559,7 +612,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
V = Builder.CreateBitCast(V, ToType, "tmp");
return V;
}
-
+
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (const StructType *ST = dyn_cast<StructType>(ToType)) {
@@ -573,7 +626,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
}
return Res;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
@@ -609,7 +662,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
ConstantInt::get(FromVal->getType(),
ShAmt), "tmp");
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
- FromVal = Builder.CreateShl(FromVal,
+ FromVal = Builder.CreateShl(FromVal,
ConstantInt::get(FromVal->getType(),
-ShAmt), "tmp");
@@ -617,11 +670,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
- Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
- Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
@@ -658,7 +711,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
-
+
// Changing the whole vector with memset or with an access of a different
// vector type?
if (ValSize == VecSize)
@@ -668,28 +721,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Must be an element insertion.
unsigned Elt = Offset/EltSize;
-
+
if (SV->getType() != VTy->getElementType())
SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-
- SV = Builder.CreateInsertElement(Old, SV,
+
+ SV = Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
return SV;
}
-
+
// If SV is a first-class aggregate value, insert each value recursively.
if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
- Old = ConvertScalar_InsertValue(Elt, Old,
+ Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
}
return Old;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
@@ -789,16 +842,298 @@ bool SROA::runOnFunction(Function &F) {
return Changed;
}
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst *AI;
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
+ : LoadAndStorePromoter(Insts, S), AI(0) {}
+
+ void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ this->AI = AI;
+ LoadAndStorePromoter::run(Insts);
+ AI->eraseFromParent();
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == AI;
+ return cast<StoreInst>(I)->getPointerOperand() == AI;
+ }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the InVal is an invoke in the pred, we can't put a load on the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == Pred)
+ return false;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || SI->isVolatile())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+ DominatorTree *DT = 0;
+ if (HasDomTree)
+ DT = &getAnalysis<DominatorTree>();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
bool Changed = false;
-
+ SmallVector<Instruction*, 64> Insts;
while (1) {
Allocas.clear();
@@ -806,12 +1141,27 @@ bool SROA::performPromotion(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
+ if (tryToMakeAllocaBePromotable(AI, TD))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ if (HasDomTree)
+ PromoteMemToReg(Allocas, *DT);
+ else {
+ SSAUpdater SSA;
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ AllocaInst *AI = Allocas[i];
+
+ // Build list of instructions to promote.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ Insts.push_back(cast<Instruction>(*UI));
+
+ AllocaPromoter(Insts, SSA).run(AI, Insts);
+ Insts.clear();
+ }
+ }
NumPromoted += Allocas.size();
Changed = true;
}
@@ -853,7 +1203,7 @@ bool SROA::performScalarRepl(Function &F) {
while (!WorkList.empty()) {
AllocaInst *AI = WorkList.back();
WorkList.pop_back();
-
+
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// with unused elements.
if (AI->use_empty()) {
@@ -865,7 +1215,7 @@ bool SROA::performScalarRepl(Function &F) {
// If this alloca is impossible for us to promote, reject it early.
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
-
+
// Check to see if this allocation is only modified by a memcpy/memmove from
// a constant global. If this is the case, we can change all users to use
// the constant global instead. This is commonly produced by the CFE by
@@ -882,7 +1232,7 @@ bool SROA::performScalarRepl(Function &F) {
Changed = true;
continue;
}
-
+
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -891,10 +1241,10 @@ bool SROA::performScalarRepl(Function &F) {
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
-
+
// Do not promote any struct whose size is too big.
if (AllocaSize > SRThreshold) continue;
-
+
// If the alloca looks like a good candidate for scalar replacement, and if
// all its users can be transformed, then split up the aggregate into its
// separate elements.
@@ -917,8 +1267,8 @@ bool SROA::performScalarRepl(Function &F) {
++NumConverted;
Changed = true;
continue;
- }
-
+ }
+
// Otherwise, couldn't process this alloca.
}
@@ -927,14 +1277,14 @@ bool SROA::performScalarRepl(Function &F) {
/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
/// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocaInst *AI,
+void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -982,48 +1332,106 @@ void SROA::DeleteDeadInstructions() {
I->eraseFromParent();
}
}
-
+
/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
/// performing scalar replacement of alloca AI. The results are flagged in
/// the Info parameter. Offset indicates the position within AI that is
/// referenced by this instruction.
-void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
- isSafeForScalarRepl(BC, AI, Offset, Info);
+ isSafeForScalarRepl(BC, Offset, Info);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
uint64_t GEPOffset = Offset;
- isSafeGEP(GEPI, AI, GEPOffset, Info);
+ isSafeGEP(GEPI, GEPOffset, Info);
if (!Info.isUnsafe)
- isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length)
- isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info);
- else
- MarkUnsafe(Info);
+ if (Length == 0)
+ return MarkUnsafe(Info, User);
+ isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (!LI->isVolatile()) {
- const Type *LIType = LI->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
- LIType, false, Info);
- } else
- MarkUnsafe(Info);
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (!SI->isVolatile() && SI->getOperand(0) != I) {
- const Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
- SIType, true, Info);
- } else
- MarkUnsafe(Info);
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- MarkUnsafe(Info);
+ return MarkUnsafe(Info, User);
}
if (Info.isUnsafe) return;
}
@@ -1034,7 +1442,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
/// references, and when the resulting offset corresponds to an element within
/// the alloca type. The results are flagged in the Info parameter. Upon
/// return, Offset is adjusted as specified by the GEP indices.
-void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
uint64_t &Offset, AllocaInfo &Info) {
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
if (GEPIt == E)
@@ -1049,7 +1457,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
if (!IdxVal)
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
@@ -1057,40 +1465,92 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
&Indices[0], Indices.size());
- if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
- MarkUnsafe(Info);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays). If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+ const Type *&EltTy) {
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ NumElts = AT->getNumElements();
+ EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ return true;
+ }
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ NumElts = ST->getNumContainedTypes();
+ EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ for (unsigned n = 1; n < NumElts; ++n) {
+ if (ST->getContainedType(n) != EltTy)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+ if (T1 == T2)
+ return true;
+
+ unsigned NumElts1, NumElts2;
+ const Type *EltTy1, *EltTy2;
+ if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+ isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+ NumElts1 == NumElts2 &&
+ EltTy1 == EltTy2)
+ return true;
+
+ return false;
}
/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
/// alloca or has an offset and size that corresponds to a component element
/// within it. The offset checked here may have been formed from a GEP with a
/// pointer bitcasted to a different type.
-void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
const Type *MemOpType, bool isStore,
- AllocaInfo &Info) {
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
- if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
- bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
- // This is safe for MemIntrinsics (where MemOpType is 0), integer types
- // (which are essentially the same as the MemIntrinsics, especially with
- // regard to copying padding between elements), or references using the
- // aggregate type of the alloca.
- if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
- if (!UsesAggregateType) {
- if (isStore)
- Info.isMemCpyDst = true;
- else
- Info.isMemCpySrc = true;
- }
+ if (Offset == 0 && AllowWholeAccess &&
+ MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+ // loads/stores (which are essentially the same as the MemIntrinsics with
+ // regard to copying padding between elements). But, if an alloca is
+ // flagged as both a source and destination of such operations, we'll need
+ // to check later for padding between elements.
+ if (!MemOpType || MemOpType->isIntegerTy()) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ return;
+ }
+ // This is also safe for references using a type that is compatible with
+ // the type of the alloca, so that loads/stores can be rewritten using
+ // insertvalue/extractvalue.
+ if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+ Info.hasSubelementAccess = true;
return;
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize))
+ const Type *T = Info.AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize)) {
+ Info.hasSubelementAccess = true;
return;
+ }
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, TheAccess);
}
/// TypeHasComponent - Return true if T has a component type with the
@@ -1127,14 +1587,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
@@ -1142,9 +1609,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
const Type *LIType = LI->getType();
- if (LIType == AI->getAllocatedType()) {
+
+ if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
// %res = load { i32, i32 }* %alloc
// with:
@@ -1166,10 +1637,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
const Type *SIType = Val->getType();
- if (SIType == AI->getAllocatedType()) {
+ if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
// with:
@@ -1189,6 +1663,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
}
}
}
@@ -1316,7 +1810,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// function is only called for mem intrinsics that access the whole
// aggregate, so non-zero GEPs are not an issue here.)
OtherPtr = OtherPtr->stripPointerCasts();
-
+
// Copying the alloca to itself is a no-op: just delete it.
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
@@ -1327,28 +1821,26 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
DeadInsts.push_back(MI);
return;
}
-
+
// If the pointer is not the right type, insert a bitcast to the right
// type.
const Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
-
+
if (OtherPtr->getType() != NewTy)
OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
}
-
+
// Process each element of the aggregate.
- Value *TheFn = MI->getCalledValue();
- const Type *BytePtrTy = MI->getRawDest()->getType();
bool SROADest = MI->getRawDest() == Inst;
-
+
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
Value *OtherElt = 0;
unsigned OtherEltAlign = MemAlignment;
-
+
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
@@ -1364,7 +1856,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
-
+
// The alignment of the other pointer is the guaranteed alignment of the
// element, which is affected by both the known alignment of the whole
// mem intrinsic and the alignment of the element. If the alignment of
@@ -1372,10 +1864,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// known alignment is just 4 bytes.
OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
}
-
+
Value *EltPtr = NewElts[i];
const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
-
+
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
if (isa<MemTransferInst>(MI)) {
@@ -1391,7 +1883,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
continue;
}
assert(isa<MemSetInst>(MI));
-
+
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
@@ -1411,7 +1903,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
TotalVal = TotalVal.shl(8);
TotalVal |= OneVal;
}
-
+
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
if (ValTy->isPointerTy())
@@ -1419,12 +1911,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
-
+
// If the requested value was a vector constant, create it.
if (EltTy != ValTy) {
unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
- StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ StoreVal = ConstantVector::get(Elts);
}
}
new StoreInst(StoreVal, EltPtr, MI);
@@ -1433,55 +1925,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// Otherwise, if we're storing a byte variable, use a memset call for
// this element.
}
-
- // Cast the element pointer to BytePtrTy.
- if (EltPtr->getType() != BytePtrTy)
- EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
-
- // Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy) {
- // Preserve address space of OtherElt
- const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
- const PointerType* PTy = cast<PointerType>(BytePtrTy);
- if (OtherPTy->getElementType() != PTy->getElementType()) {
- Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
- OtherPTy->getAddressSpace());
- OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
- OtherElt->getName(), MI);
- }
- }
-
+
unsigned EltSize = TD->getTypeAllocSize(EltTy);
-
+
+ IRBuilder<> Builder(MI);
+
// Finally, insert the meminst for this element.
- if (isa<MemTransferInst>(MI)) {
- Value *Ops[] = {
- SROADest ? EltPtr : OtherElt, // Dest ptr
- SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- // Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
- MI->getVolatileCst()
- };
- // In case we fold the address space overloaded memcpy of A to B
- // with memcpy of B to C, change the function to be a memcpy of A to C.
- const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
- Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ if (isa<MemSetInst>(MI)) {
+ Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+ MI->isVolatile());
} else {
- assert(isa<MemSetInst>(MI));
- Value *Ops[] = {
- EltPtr, MI->getArgOperand(1), // Dest, Value,
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- Zero, // Align
- ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ assert(isa<MemTransferInst>(MI));
+ Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
+ Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
+
+ if (isa<MemCpyInst>(MI))
+ Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+ else
+ Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
}
}
DeadInsts.push_back(MI);
@@ -1497,12 +1958,13 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *SrcVal = SI->getOperand(0);
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- SrcVal = new ZExtInst(SrcVal,
- IntegerType::get(SI->getContext(), AllocaSizeBits),
- "", SI);
+ SrcVal = Builder.CreateZExt(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits));
DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
<< '\n');
@@ -1511,47 +1973,44 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// have different ways to compute the element offset.
if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
const Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
+
if (FieldSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(), FieldSizeBits),
- "", SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
} else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, FieldTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
}
-
+
} else {
const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
const Type *ArrayEltTy = ATy->getElementType();
@@ -1559,50 +2018,48 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
- else
+ else
Shift = 0;
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Ignore zero sized fields like {}, they obviously contain no data.
if (ElementSizeBits == 0) continue;
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
if (ElementSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(),
- ElementSizeBits),"",SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
} else if (ArrayEltTy->isFloatingPointTy() ||
ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
-
+
if (TD->isBigEndian())
Shift -= ElementOffset;
- else
+ else
Shift += ElementOffset;
}
}
-
+
DeadInsts.push_back(SI);
}
@@ -1614,10 +2071,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// and form the result value.
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
-
+
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
-
+
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
@@ -1627,11 +2084,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
} else {
const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
- }
-
- Value *ResultVal =
+ }
+
+ Value *ResultVal =
Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
@@ -1639,11 +2096,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+
+ const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -1661,17 +2118,17 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// we can shift and insert it.
if (SrcField->getType() != ResultVal->getType())
SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
-
+
// Determine the number of bits to shift SrcField.
uint64_t Shift;
if (Layout) // Struct case.
Shift = Layout->getElementOffsetInBits(i);
else // Array case.
Shift = i*ArrayEltBitOffset;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
-
+
if (Shift) {
Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
@@ -1694,46 +2151,39 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
/// HasPadding - Return true if the specified type has any structure or
-/// alignment padding, false otherwise.
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
- return HasPadding(ATy->getElementType(), TD);
-
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
- return HasPadding(VTy->getElementType(), TD);
-
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TD.getStructLayout(STy);
- unsigned PrevFieldBitOffset = 0;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-
- // Padding in sub-elements?
- if (HasPadding(STy->getElementType(i), TD))
- return true;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ }
- // Check to see if there is any padding between this element and the
- // previous one.
- if (i) {
- unsigned PrevFieldEnd =
+ // SROA currently handles only Arrays and Structs.
+ const StructType *STy = cast<StructType>(Ty);
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
- if (PrevFieldEnd < FieldBitOffset)
- return true;
- }
-
- PrevFieldBitOffset = FieldBitOffset;
- }
-
- // Check for tail padding.
- if (unsigned EltCount = STy->getNumElements()) {
- unsigned PrevFieldEnd = PrevFieldBitOffset +
- TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
- if (PrevFieldEnd < SL->getSizeInBits())
+ if (PrevFieldEnd < FieldBitOffset)
return true;
}
+ PrevFieldBitOffset = FieldBitOffset;
}
-
- return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ return false;
}
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@@ -1742,14 +2192,14 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// Loop over the use list of the alloca. We can only transform it if all of
// the users are safe to transform.
- AllocaInfo Info;
-
- isSafeForScalarRepl(AI, AI, 0, Info);
+ AllocaInfo Info(AI);
+
+ isSafeForScalarRepl(AI, 0, Info);
if (Info.isUnsafe) {
DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
return false;
}
-
+
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// source and destination, we have to be careful. In particular, the memcpy
// could be moving around elements that live in structure padding of the LLVM
@@ -1759,6 +2209,20 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
HasPadding(AI->getAllocatedType(), *TD))
return false;
+ // If the alloca never has an access to just *part* of it, but is accessed
+ // via loads and stores, then we should use ConvertToScalarInfo to promote
+ // the alloca instead of promoting each piece at a time and inserting fission
+ // and fusion code.
+ if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+ // If the struct/array just has one element, use basic SRoA.
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (ST->getNumElements() > 1) return false;
+ } else {
+ if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+ return false;
+ }
+ }
+
return true;
}
@@ -1771,7 +2235,7 @@ static bool PointsToConstantGlobal(Value *V) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return GV->isConstant();
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
+ if (CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::GetElementPtr)
return PointsToConstantGlobal(CE->getOperand(0));
return false;
@@ -1782,18 +2246,19 @@ static bool PointsToConstantGlobal(Value *V) {
/// see any stores or other unknown uses. If we see pointer arithmetic, keep
/// track of whether it moves the pointer (with isOffset) but otherwise traverse
/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
bool isOffset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
User *U = cast<Instruction>(*UI);
- if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (!LI->isVolatile())
- continue;
-
+ if (LI->isVolatile()) return false;
+ continue;
+ }
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
@@ -1808,27 +2273,52 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
return false;
continue;
}
-
+
+ if (CallSite CS = U) {
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load and we can ignore it.
+ if (CS.onlyReadsMemory())
+ continue;
+
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ continue;
+ }
+
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
if (MI == 0)
return false;
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
// If we already have seen a copy, reject the second one.
if (TheCopy) return false;
-
+
// If the pointer has been offset from the start of the alloca, we can't
// safely handle this.
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
if (UI.getOperandNo() != 0) return false;
-
+
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
return false;
-
+
// Otherwise, the transform is safe. Remember the copy instruction.
TheCopy = MI;
}
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
deleted file mode 100644
index 87c9d63..0000000
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies an experimental
-// transformation on calls to specific functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls-halfpowr"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-namespace {
- /// This pass optimizes well half_powr function calls.
- ///
- class SimplifyHalfPowrLibCalls : public FunctionPass {
- const TargetData *TD;
- public:
- static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
- initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
-
- Instruction *
- InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt);
- };
- char SimplifyHalfPowrLibCalls::ID = 0;
-} // end anonymous namespace.
-
-INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false)
-
-// Public interface to the Simplify HalfPowr LibCalls pass.
-FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
- return new SimplifyHalfPowrLibCalls();
-}
-
-/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
-/// their control flow to better facilitate subsequent optimization.
-Instruction *
-SimplifyHalfPowrLibCalls::
-InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt) {
- std::vector<BasicBlock *> Bodies;
- BasicBlock *NewBlock = 0;
-
- for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
- CallInst *Call = cast<CallInst>(HalfPowrs[i]);
- Function *Callee = Call->getCalledFunction();
-
- // Minimally sanity-check the CFG of half_powr to ensure that it contains
- // the kind of code we expect. If we're running this pass, we have
- // reason to believe it will be what we expect.
- Function::iterator I = Callee->begin();
- BasicBlock *Prologue = I++;
- if (I == Callee->end()) break;
- BasicBlock *SubnormalHandling = I++;
- if (I == Callee->end()) break;
- BasicBlock *Body = I++;
- if (I != Callee->end()) break;
- if (SubnormalHandling->getSinglePredecessor() != Prologue)
- break;
- BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
- if (!PBI || !PBI->isConditional())
- break;
- BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
- if (!SNBI || SNBI->isConditional())
- break;
- if (!isa<ReturnInst>(Body->getTerminator()))
- break;
-
- Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
-
- // Inline the call, taking care of what code ends up where.
- NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
-
- InlineFunctionInfo IFI(0, TD);
- bool B = InlineFunction(Call, IFI);
- assert(B && "half_powr didn't inline?"); B=B;
-
- BasicBlock *NewBody = NewBlock->getSinglePredecessor();
- assert(NewBody);
- Bodies.push_back(NewBody);
- }
-
- if (!NewBlock)
- return InsertPt;
-
- // Put the code for all the bodies into one block, to facilitate
- // subsequent optimization.
- (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
- for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
- BasicBlock *Body = Bodies[i];
- Instruction *FNP = Body->getFirstNonPHI();
- // Splice the insts from body into NewBlock.
- NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
- FNP, Body->getTerminator());
- }
-
- return NewBlock->begin();
-}
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
-
- bool Changed = false;
- std::vector<Instruction *> HalfPowrs;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- // Look for calls.
- bool IsHalfPowr = false;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Look for direct calls and calls to non-external functions.
- Function *Callee = CI->getCalledFunction();
- if (Callee && Callee->hasExternalLinkage()) {
- // Look for calls with well-known names.
- if (Callee->getName() == "__half_powrf4")
- IsHalfPowr = true;
- }
- }
- if (IsHalfPowr)
- HalfPowrs.push_back(I);
- // We're looking for sequences of up to three such calls, which we'll
- // simplify as a group.
- if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
- I = InlineHalfPowrs(HalfPowrs, I);
- E = I->getParent()->end();
- HalfPowrs.clear();
- Changed = true;
- }
- }
- assert(HalfPowrs.empty() && "Block had no terminator!");
- }
-
- return Changed;
-}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 5b22b23..54eaada 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -25,13 +25,14 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
using namespace llvm;
STATISTIC(NumSimplified, "Number of library calls simplified");
@@ -48,6 +49,7 @@ class LibCallOptimization {
protected:
Function *Caller;
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
LLVMContext* Context;
public:
LibCallOptimization() { }
@@ -61,9 +63,11 @@ public:
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
=0;
- Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+ Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
this->TD = TD;
+ this->TLI = TLI;
if (CI->getCalledFunction())
Context = &CI->getCalledFunction()->getContext();
@@ -96,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
}
return true;
}
+
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
/// comparisons with With.
@@ -123,7 +136,7 @@ struct StrCatOpt : public LibCallOptimization {
// Verify the "strcat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType())
return 0;
@@ -160,9 +173,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- EmitMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
- 1, false, B, TD);
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
}
};
@@ -174,7 +186,7 @@ struct StrNCatOpt : public StrCatOpt {
// Verify the "strncat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
!FT->getParamType(2)->isIntegerTy())
@@ -222,7 +234,7 @@ struct StrChrOpt : public LibCallOptimization {
// Verify the "strchr" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
return 0;
@@ -260,8 +272,7 @@ struct StrChrOpt : public LibCallOptimization {
return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
- return B.CreateGEP(SrcStr, Idx, "strchr");
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
}
};
@@ -273,7 +284,7 @@ struct StrRChrOpt : public LibCallOptimization {
// Verify the "strrchr" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
return 0;
@@ -302,8 +313,7 @@ struct StrRChrOpt : public LibCallOptimization {
return Constant::getNullValue(CI->getType());
// strrchr(s+n,c) -> gep(s+n+i,c)
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
- return B.CreateGEP(SrcStr, Idx, "strrchr");
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
}
};
@@ -317,7 +327,7 @@ struct StrCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
@@ -365,7 +375,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -420,7 +430,7 @@ struct StrCpyOpt : public LibCallOptimization {
if (FT->getNumParams() != NumParams ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -441,9 +451,8 @@ struct StrCpyOpt : public LibCallOptimization {
ConstantInt::get(TD->getIntPtrType(*Context), Len),
CI->getArgOperand(2), B, TD);
else
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
};
@@ -456,7 +465,7 @@ struct StrNCpyOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -471,8 +480,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y, 1)
- EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
- LenOp, false, B, TD);
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
return Dst;
}
@@ -491,9 +499,8 @@ struct StrNCpyOpt : public LibCallOptimization {
if (Len > SrcLen+1) return 0;
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
@@ -506,7 +513,7 @@ struct StrLenOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
return 0;
@@ -532,7 +539,7 @@ struct StrPBrkOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
FT->getReturnType() != FT->getParamType(0))
return 0;
@@ -552,8 +559,7 @@ struct StrPBrkOpt : public LibCallOptimization {
if (I == std::string::npos) // No match.
return Constant::getNullValue(CI->getType());
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), I);
- return B.CreateGEP(CI->getArgOperand(0), Idx, "strpbrk");
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
}
// strpbrk(s, "a") -> strchr(s, 'a')
@@ -577,7 +583,8 @@ struct StrToOpt : public LibCallOptimization {
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
- CI->setOnlyReadsMemory();
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
CI->addAttribute(1, Attribute::NoCapture);
}
@@ -592,7 +599,7 @@ struct StrSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
return 0;
@@ -621,7 +628,7 @@ struct StrCSpnOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
return 0;
@@ -774,8 +781,8 @@ struct MemCpyOpt : public LibCallOptimization {
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -796,8 +803,8 @@ struct MemMoveOpt : public LibCallOptimization {
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -818,9 +825,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1),
- Type::getInt8Ty(*Context), false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -902,12 +908,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
}
if (LdExpArg) {
@@ -926,7 +930,7 @@ struct Exp2Opt : public LibCallOptimization {
Module *M = Caller->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(),
- Type::getInt32Ty(*Context),NULL);
+ B.getInt32Ty(), NULL);
CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -956,7 +960,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
Value *V = Cast->getOperand(0);
V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
Callee->getAttributes());
- return B.CreateFPExt(V, Type::getDoubleTy(*Context));
+ return B.CreateFPExt(V, B.getDoubleTy());
}
};
@@ -983,8 +987,8 @@ struct FFSOpt : public LibCallOptimization {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->getValue() == 0) // ffs(0) -> 0.
return Constant::getNullValue(CI->getType());
- return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
- CI->getValue().countTrailingZeros()+1);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
@@ -993,11 +997,10 @@ struct FFSOpt : public LibCallOptimization {
Intrinsic::cttz, &ArgType, 1);
Value *V = B.CreateCall(F, Op, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+ V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
- return B.CreateSelect(Cond, V,
- ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
@@ -1014,10 +1017,8 @@ struct IsDigitOpt : public LibCallOptimization {
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
- "isdigittmp");
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
- "isdigit");
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -1035,8 +1036,7 @@ struct IsAsciiOpt : public LibCallOptimization {
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
- "isascii");
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -1054,8 +1054,7 @@ struct AbsOpt : public LibCallOptimization {
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
- Value *Pos = B.CreateICmpSGT(Op,
- Constant::getAllOnesValue(Op->getType()),
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
"ispos");
Value *Neg = B.CreateNeg(Op, "neg");
return B.CreateSelect(Pos, Op, Neg);
@@ -1088,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization {
// 'printf' Optimizations
struct PrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -1106,11 +1099,15 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
- // in case there is an error writing to stdout.
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
- FormatStr[0]), B, TD);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1141,27 +1138,46 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy() &&
- CI->use_empty()) {
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
// 'sprintf' Optimizations
struct SPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed pointer arguments and an integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1179,9 +1195,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
- ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
- FormatStr.size() + 1), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1195,13 +1211,11 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getArgOperand(2),
- Type::getInt8Ty(*Context), "char");
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
- "nul");
- B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1217,14 +1231,39 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
- IncLen, 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
@@ -1291,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization {
// 'fprintf' Optimizations
struct FPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed paramters as pointers and integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// All the optimizations depend on the format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1343,6 +1376,60 @@ struct FPrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ std::string Str;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
};
} // end anonymous namespace.
@@ -1355,6 +1442,8 @@ namespace {
/// This pass optimizes well known library functions from libc and libm.
///
class SimplifyLibCalls : public FunctionPass {
+ TargetLibraryInfo *TLI;
+
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
@@ -1370,7 +1459,8 @@ namespace {
// Formatting and IO Optimizations
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
-
+ PutsOpt Puts;
+
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
@@ -1387,14 +1477,20 @@ namespace {
void setDoesNotAlias(Function &F, unsigned n);
bool doInitialization(Module &M);
+ void inferPrototypeAttributes(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
}
};
- char SimplifyLibCalls::ID = 0;
} // end anonymous namespace.
-INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false)
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false)
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1426,9 +1522,9 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strcspn"] = &StrCSpn;
Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
- Optimizations["memcpy"] = &MemCpy;
+ if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
Optimizations["memmove"] = &MemMove;
- Optimizations["memset"] = &MemSet;
+ if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
// _chk variants of String and Memory LibCall Optimizations.
Optimizations["__strcpy_chk"] = &StrCpyChk;
@@ -1484,12 +1580,15 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["fwrite"] = &FWrite;
Optimizations["fputs"] = &FPuts;
Optimizations["fprintf"] = &FPrintF;
+ Optimizations["puts"] = &Puts;
}
/// runOnFunction - Top level algorithm.
///
bool SimplifyLibCalls::runOnFunction(Function &F) {
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
if (Optimizations.empty())
InitOptimizations();
@@ -1518,7 +1617,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
Builder.SetInsertPoint(BB, I);
// Try to optimize this call.
- Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+ Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
if (Result == 0) continue;
DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
@@ -1581,688 +1680,654 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
}
}
+
+void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
+ const FunctionType *FTy = F.getFunctionType();
+
+ StringRef Name = F.getName();
+ switch (Name[0]) {
+ case 's':
+ if (Name == "strlen") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strchr" ||
+ Name == "strrchr") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "strcpy" ||
+ Name == "stpcpy" ||
+ Name == "strcat" ||
+ Name == "strtol" ||
+ Name == "strtod" ||
+ Name == "strtof" ||
+ Name == "strtoul" ||
+ Name == "strtoll" ||
+ Name == "strtold" ||
+ Name == "strncat" ||
+ Name == "strncpy" ||
+ Name == "strtoull") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strxfrm") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strcmp" ||
+ Name == "strspn" ||
+ Name == "strncmp" ||
+ Name == "strcspn" ||
+ Name == "strcoll" ||
+ Name == "strcasecmp" ||
+ Name == "strncasecmp") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strstr" ||
+ Name == "strpbrk") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "strtok" ||
+ Name == "strtok_r") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "scanf" ||
+ Name == "setbuf" ||
+ Name == "setvbuf") {
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "strdup" ||
+ Name == "strndup") {
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "stat" ||
+ Name == "sscanf" ||
+ Name == "sprintf" ||
+ Name == "statvfs") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "snprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "setitimer") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "system") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'm':
+ if (Name == "malloc") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "memcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memchr" ||
+ Name == "memrchr") {
+ if (FTy->getNumParams() != 3)
+ return;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ } else if (Name == "modf" ||
+ Name == "modff" ||
+ Name == "modfl" ||
+ Name == "memcpy" ||
+ Name == "memccpy" ||
+ Name == "memmove") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "memalign") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotAlias(F, 0);
+ } else if (Name == "mkdir" ||
+ Name == "mktime") {
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'r':
+ if (Name == "realloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "read") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "rmdir" ||
+ Name == "rewind" ||
+ Name == "remove" ||
+ Name == "realpath") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "rename" ||
+ Name == "readlink") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'w':
+ if (Name == "write") {
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'b':
+ if (Name == "bcopy") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bcmp") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "bzero") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'c':
+ if (Name == "calloc") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "chmod" ||
+ Name == "chown" ||
+ Name == "ctermid" ||
+ Name == "clearerr" ||
+ Name == "closedir") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'a':
+ if (Name == "atoi" ||
+ Name == "atol" ||
+ Name == "atof" ||
+ Name == "atoll") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "access") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'f':
+ if (Name == "fopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fdopen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "feof" ||
+ Name == "free" ||
+ Name == "fseek" ||
+ Name == "ftell" ||
+ Name == "fgetc" ||
+ Name == "fseeko" ||
+ Name == "ftello" ||
+ Name == "fileno" ||
+ Name == "fflush" ||
+ Name == "fclose" ||
+ Name == "fsetpos" ||
+ Name == "flockfile" ||
+ Name == "funlockfile" ||
+ Name == "ftrylockfile") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "ferror") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ } else if (Name == "fputc" ||
+ Name == "fstat" ||
+ Name == "frexp" ||
+ Name == "frexpf" ||
+ Name == "frexpl" ||
+ Name == "fstatvfs") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "fgets") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ } else if (Name == "fread" ||
+ Name == "fwrite") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ } else if (Name == "fputs" ||
+ Name == "fscanf" ||
+ Name == "fprintf" ||
+ Name == "fgetpos") {
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'g':
+ if (Name == "getc" ||
+ Name == "getlogin_r" ||
+ Name == "getc_unlocked") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "getenv") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "gets" ||
+ Name == "getchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "getitimer") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "getpwnam") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'u':
+ if (Name == "ungetc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "uname" ||
+ Name == "unlink" ||
+ Name == "unsetenv") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "utime" ||
+ Name == "utimes") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 'p':
+ if (Name == "putc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "puts" ||
+ Name == "printf" ||
+ Name == "perror") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "pread" ||
+ Name == "pwrite") {
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ } else if (Name == "putchar") {
+ setDoesNotThrow(F);
+ } else if (Name == "popen") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "pclose") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'v':
+ if (Name == "vscanf") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vsscanf" ||
+ Name == "vfscanf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "valloc") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "vprintf") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "vfprintf" ||
+ Name == "vsprintf") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "vsnprintf") {
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ }
+ break;
+ case 'o':
+ if (Name == "open") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ } else if (Name == "opendir") {
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 't':
+ if (Name == "tmpfile") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "times") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'h':
+ if (Name == "htonl" ||
+ Name == "htons") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'n':
+ if (Name == "ntohl" ||
+ Name == "ntohs") {
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ }
+ break;
+ case 'l':
+ if (Name == "lstat") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "lchown") {
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ case 'q':
+ if (Name == "qsort") {
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ }
+ break;
+ case '_':
+ if (Name == "__strdup" ||
+ Name == "__strndup") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "__strtok_r") {
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "_IO_getc") {
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "_IO_putc") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ }
+ break;
+ case 1:
+ if (Name == "\1__isoc99_scanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1stat64" ||
+ Name == "\1lstat64" ||
+ Name == "\1statvfs64" ||
+ Name == "\1__isoc99_sscanf") {
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fopen64") {
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1fseeko64" ||
+ Name == "\1ftello64") {
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ } else if (Name == "\1tmpfile64") {
+ if (!FTy->getReturnType()->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ } else if (Name == "\1fstat64" ||
+ Name == "\1fstatvfs64") {
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ } else if (Name == "\1open64") {
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ }
+ break;
+ }
+}
+
/// doInitialization - Add attributes to well-known functions.
///
bool SimplifyLibCalls::doInitialization(Module &M) {
Modified = false;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
- if (!F.isDeclaration())
- continue;
-
- if (!F.hasName())
- continue;
-
- const FunctionType *FTy = F.getFunctionType();
-
- StringRef Name = F.getName();
- switch (Name[0]) {
- case 's':
- if (Name == "strlen") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strchr" ||
- Name == "strrchr") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "strcpy" ||
- Name == "stpcpy" ||
- Name == "strcat" ||
- Name == "strtol" ||
- Name == "strtod" ||
- Name == "strtof" ||
- Name == "strtoul" ||
- Name == "strtoll" ||
- Name == "strtold" ||
- Name == "strncat" ||
- Name == "strncpy" ||
- Name == "strtoull") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strxfrm") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strcmp" ||
- Name == "strspn" ||
- Name == "strncmp" ||
- Name == "strcspn" ||
- Name == "strcoll" ||
- Name == "strcasecmp" ||
- Name == "strncasecmp") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strstr" ||
- Name == "strpbrk") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strtok" ||
- Name == "strtok_r") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "scanf" ||
- Name == "setbuf" ||
- Name == "setvbuf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strdup" ||
- Name == "strndup") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "stat" ||
- Name == "sscanf" ||
- Name == "sprintf" ||
- Name == "statvfs") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "snprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- } else if (Name == "setitimer") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- } else if (Name == "system") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- case 'm':
- if (Name == "malloc") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "memcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "memchr" ||
- Name == "memrchr") {
- if (FTy->getNumParams() != 3)
- continue;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "modf" ||
- Name == "modff" ||
- Name == "modfl" ||
- Name == "memcpy" ||
- Name == "memccpy" ||
- Name == "memmove") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "memalign") {
- if (!FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotAlias(F, 0);
- } else if (Name == "mkdir" ||
- Name == "mktime") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'r':
- if (Name == "realloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "read") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- } else if (Name == "rmdir" ||
- Name == "rewind" ||
- Name == "remove" ||
- Name == "realpath") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "rename" ||
- Name == "readlink") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'w':
- if (Name == "write") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- }
- break;
- case 'b':
- if (Name == "bcopy") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bzero") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'c':
- if (Name == "calloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "chmod" ||
- Name == "chown" ||
- Name == "ctermid" ||
- Name == "clearerr" ||
- Name == "closedir") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'a':
- if (Name == "atoi" ||
- Name == "atol" ||
- Name == "atof" ||
- Name == "atoll") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "access") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'f':
- if (Name == "fopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "fdopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- } else if (Name == "feof" ||
- Name == "free" ||
- Name == "fseek" ||
- Name == "ftell" ||
- Name == "fgetc" ||
- Name == "fseeko" ||
- Name == "ftello" ||
- Name == "fileno" ||
- Name == "fflush" ||
- Name == "fclose" ||
- Name == "fsetpos" ||
- Name == "flockfile" ||
- Name == "funlockfile" ||
- Name == "ftrylockfile") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "ferror") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- } else if (Name == "fputc" ||
- Name == "fstat" ||
- Name == "frexp" ||
- Name == "frexpf" ||
- Name == "frexpl" ||
- Name == "fstatvfs") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "fgets") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- } else if (Name == "fread" ||
- Name == "fwrite") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- } else if (Name == "fputs" ||
- Name == "fscanf" ||
- Name == "fprintf" ||
- Name == "fgetpos") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'g':
- if (Name == "getc" ||
- Name == "getlogin_r" ||
- Name == "getc_unlocked") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "getenv") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "gets" ||
- Name == "getchar") {
- setDoesNotThrow(F);
- } else if (Name == "getitimer") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "getpwnam") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'u':
- if (Name == "ungetc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "uname" ||
- Name == "unlink" ||
- Name == "unsetenv") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "utime" ||
- Name == "utimes") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'p':
- if (Name == "putc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "puts" ||
- Name == "printf" ||
- Name == "perror") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "pread" ||
- Name == "pwrite") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- // May throw; these are valid pthread cancellation points.
- setDoesNotCapture(F, 2);
- } else if (Name == "putchar") {
- setDoesNotThrow(F);
- } else if (Name == "popen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "pclose") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'v':
- if (Name == "vscanf") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vsscanf" ||
- Name == "vfscanf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "valloc") {
- if (!FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "vprintf") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vfprintf" ||
- Name == "vsprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "vsnprintf") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- }
- break;
- case 'o':
- if (Name == "open") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- } else if (Name == "opendir") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- }
- break;
- case 't':
- if (Name == "tmpfile") {
- if (!FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "times") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'h':
- if (Name == "htonl" ||
- Name == "htons") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'n':
- if (Name == "ntohl" ||
- Name == "ntohs") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'l':
- if (Name == "lstat") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "lchown") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'q':
- if (Name == "qsort") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(3)->isPointerTy())
- continue;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- }
- break;
- case '_':
- if (Name == "__strdup" ||
- Name == "__strndup") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "__strtok_r") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "_IO_getc") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "_IO_putc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- }
- break;
- case 1:
- if (Name == "\1__isoc99_scanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1stat64" ||
- Name == "\1lstat64" ||
- Name == "\1statvfs64" ||
- Name == "\1__isoc99_sscanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fopen64") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fseeko64" ||
- Name == "\1ftello64") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1tmpfile64") {
- if (!FTy->getReturnType()->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "\1fstat64" ||
- Name == "\1fstatvfs64") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(1)->isPointerTy())
- continue;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1open64") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy())
- continue;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- }
+ if (F.isDeclaration() && F.hasName())
+ inferPrototypeAttributes(F);
}
return Modified;
}
@@ -2298,9 +2363,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
-// puts:
-// * puts("") -> putchar('\n')
-//
// round, roundf, roundl:
// * round(cnst) -> cnst'
//
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index e954c15..9dd83c0 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -26,14 +26,14 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <map>
using namespace llvm;
@@ -362,8 +362,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
+ else if (Value *V = SimplifyInstruction(Inst)) {
+ Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
}
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f41852a..5b6bc04 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,21 +52,28 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
@@ -79,6 +86,18 @@ namespace {
virtual bool runOnFunction(Function &F);
private:
+ CallInst *FindTRECandidate(Instruction *I,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVector<PHINode*, 8> &ArgumentPHIs,
@@ -135,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
-
bool FunctionContainsEscapingAllocas = false;
// CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
@@ -162,10 +180,17 @@ bool TailCallElim::runOnFunction(Function &F) {
return false;
// Second pass, change any tail calls to loops.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
- MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,CannotTCETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTCETailMarkedCall);
+ MadeChange |= Change;
+ }
+ }
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
@@ -177,7 +202,7 @@ bool TailCallElim::runOnFunction(Function &F) {
PHINode *PN = ArgumentPHIs[i];
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = PN->hasConstantValue()) {
+ if (Value *PNV = SimplifyInstruction(PN)) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -324,41 +349,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
}
-bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
- BasicBlock *BB = Ret->getParent();
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = TI->getParent();
Function *F = BB->getParent();
- if (&BB->front() == Ret) // Make sure there is something before the ret...
- return false;
+ if (&BB->front() == TI) // Make sure there is something before the terminator.
+ return 0;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI;
- BasicBlock::iterator BBI = Ret;
- while (1) {
+ CallInst *CI = 0;
+ BasicBlock::iterator BBI = TI;
+ while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
break;
if (BBI == BB->begin())
- return false; // Didn't find a potential tail call.
+ return 0; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return false;
+ return 0;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- &BB->front() == CI && &*++BB->begin() == Ret &&
+ FirstNonDbg(BB->front()) == CI &&
+ FirstNonDbg(llvm::next(BB->begin())) == TI &&
callIsSmall(F)) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -369,9 +400,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return false;
+ return 0;
}
+ return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -389,7 +428,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ BasicBlock::iterator BBI = CI;
+ for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
@@ -426,6 +466,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
return false;
}
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
if (OldEntry == 0) {
@@ -535,3 +578,53 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
++NumEliminated;
return true;
}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ bool Change = false;
+
+ // If the return block contains nothing but the return and PHI's,
+ // there might be an opportunity to duplicate the return in its
+ // predecessors and perform TRC there. Look for predecessors that end
+ // in unconditional branch and recursive call(s).
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PTI = Pred->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
+ }
+ }
+
+ return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+ if (!CI)
+ return false;
+
+ return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+}