aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp9
-rw-r--r--lib/Transforms/Utils/Android.mk2
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp15
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp4
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp3
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp34
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt10
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp33
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp4
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp64
-rw-r--r--lib/Transforms/Utils/LLVMBuild.txt22
-rw-r--r--lib/Transforms/Utils/Local.cpp46
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp64
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp29
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp374
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp64
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp2
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp7
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp12
21 files changed, 665 insertions, 141 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb..d831452 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
- BasicBlock *MemBB = MemoryInst->getParent();
- for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
- UI != E; ++UI)
- // We know that uses of arguments and instructions have to be instructions.
- if (cast<Instruction>(*UI)->getParent() == MemBB)
- return true;
-
- return false;
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index d41096f..e7334c0 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -16,10 +16,12 @@ transforms_utils_SRC_FILES := \
Local.cpp \
LoopSimplify.cpp \
LoopUnroll.cpp \
+ LoopUnrollRuntime.cpp \
LowerExpectIntrinsic.cpp \
LowerInvoke.cpp \
LowerSwitch.cpp \
Mem2Reg.cpp \
+ ModuleUtils.cpp \
PromoteMemoryToRegister.cpp \
SSAUpdater.cpp \
SimplifyCFG.cpp \
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd..ef4a473 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BranchInst *BI = BranchInst::Create(BB, NewBB);
// Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// This is slightly more strict than necessary; the minimum requirement
// is that there be no more than one indirectbr branching to BB. And
// all BlockAddress uses would need to be updated.
@@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// node becomes an incoming value for BB's phi node. However, if the Preds
// list is empty, we need to insert dummy entries into the PHI nodes in BB to
// account for the newly created predecessor.
- if (NumPreds == 0) {
+ if (Preds.size() == 0) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
- P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
- P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
return NewBB;
}
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index 23a30cc..50c91b6 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -131,8 +131,8 @@ void BasicInlinerImpl::inlineFunctions() {
// Inline
InlineFunctionInfo IFI(0, TD);
if (InlineFunction(CS, IFI)) {
- if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
- Callee->hasAvailableExternallyLinkage()))
+ Callee->removeDeadConstantUsers();
+ if (Callee->isDefTriviallyDead())
DeadFunctions.insert(Callee);
Changed = true;
CallSites.erase(CallSites.begin() + index);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c052910..f752d79 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// form, which we're in the process of restoring!
if (!Preds.empty() && HasPredOutsideOfLoop) {
BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
- "split", P);
+ SplitBlockPredecessors(Exit, Preds, "split", P);
if (P->mustPreserveAnalysisID(LCSSAID))
CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
}
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 4b5f45b..a808303 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -15,11 +15,15 @@
#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/LLVMContext.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
- IRBuilder<> &B, const AttrListPtr &Attrs) {
- char NameBuffer[20];
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+ const AttrListPtr &Attrs) {
+ SmallString<20> NameBuffer;
if (!Op->getType()->isDoubleTy()) {
// If we need to add a suffix, copy into NameBuffer.
- unsigned NameLen = strlen(Name);
- assert(NameLen < sizeof(NameBuffer)-2);
- memcpy(NameBuffer, Name, NameLen);
+ NameBuffer += Name;
if (Op->getType()->isFloatTy())
- NameBuffer[NameLen] = 'f'; // floorf
+ NameBuffer += 'f'; // floorf
else
- NameBuffer[NameLen] = 'l'; // floorl
- NameBuffer[NameLen+1] = 0;
+ NameBuffer += 'l'; // floorl
Name = NameBuffer;
}
@@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD) {
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+ F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3),
B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
else
- F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+ F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
@@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
+ StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+ F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
File->getType(), NULL);
else
- F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+ F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 7adc5f1..d96f59c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -14,10 +14,12 @@ add_llvm_library(LLVMTransformUtils
Local.cpp
LoopSimplify.cpp
LoopUnroll.cpp
+ LoopUnrollRuntime.cpp
LowerExpectIntrinsic.cpp
LowerInvoke.cpp
LowerSwitch.cpp
Mem2Reg.cpp
+ ModuleUtils.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
@@ -27,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
Utils.cpp
ValueMapper.cpp
)
-
-add_llvm_library_dependencies(LLVMTransformUtils
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMipa
- )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index cf21f1e..c6dfe73 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -113,8 +113,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Create a new basic block and copy instructions into it!
BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
- VMap[&BB] = CBB; // Add basic block mapping.
+ // Add basic block mapping.
+ VMap[&BB] = CBB;
+
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ if (BB.hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(&BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+ }
+
+ // Note return instructions for the caller.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
Returns.push_back(RI);
}
@@ -224,6 +239,22 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+ // It is only legal to clone a function if a block address within that
+ // function is never referenced outside of the function. Given that, we
+ // want to map block addresses from the old function to block addresses in
+ // the clone. (This is different from the generic ValueMapper
+ // implementation, which generates an invalid blockaddress when
+ // cloning a function.)
+ //
+ // Note that we don't need to fix the mapping for unreachable blocks;
+ // the default mapping there is safe.
+ if (BB->hasAddressTaken()) {
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+ const_cast<BasicBlock*>(BB));
+ VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+ }
+
+
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
// Loop over all instructions, and copy them over, DCE'ing as we go. This
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8cc2649..3ef6b01 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -97,8 +97,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
InsertPt = II.getNormalDest()->begin();
}
- for (; isa<PHINode>(InsertPt); ++InsertPt)
- /* empty */; // Don't insert before any PHI nodes.
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before any PHI nodes or landingpad instrs.
new StoreInst(&I, Slot, InsertPt);
return Slot;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5464dbc..f89e1b1 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
- // Find the personality function used by the landing pads of the caller. If it
- // exists, then check to see that it matches the personality function used in
- // the callee.
- for (Function::const_iterator
- I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'isa' here should become go away once the new EH system is
- // in place.
- if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
- continue;
- const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
- const Value *CallerPersFn = LP->getPersonalityFn();
-
- // If the personality functions match, then we can perform the
- // inlining. Otherwise, we can't inline.
- // TODO: This isn't 100% true. Some personality functions are proper
- // supersets of others and can be used in place of the other.
- for (Function::const_iterator
- I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
- if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
- const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
- // the new EH system is in place.
- if (const LandingPadInst *LP =
- dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
- if (CallerPersFn != LP->getPersonalityFn())
- return false;
- break;
- }
-
+ // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+ // the new EH system is in place.
+ if (const LandingPadInst *LP =
+ dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ CalleePersonality = LP->getPersonalityFn();
break;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality)
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'isa' here should become go away once the new EH system
+ // is in place.
+ if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+ continue;
+ const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
//
@@ -987,7 +991,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// by them explicit. However, we don't do this if the callee is readonly
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
- if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+ if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo+1));
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
new file mode 100644
index 0000000..88b2ffe
--- /dev/null
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/Utils/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TransformUtils
+parent = Transforms
+required_libraries = Analysis Core IPA Support Target
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7034feb..4dd93cf 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
@@ -257,6 +258,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
II->getIntrinsicID() == Intrinsic::lifetime_end)
return isa<UndefValue>(II->getArgOperand(1));
}
+
+ if (extractMallocCall(I)) return true;
+
+ if (CallInst *CI = isFreeCall(I))
+ if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+ return C->isNullValue() || isa<UndefValue>(C);
+
return false;
}
@@ -486,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
if (Succ->getSinglePredecessor()) return true;
// Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (BBPreds.count(P))
- CommonPreds.insert(P);
- }
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -512,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// merge the phi nodes and then the blocks can still be merged
PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
+ << IBB->getName() << "\n");
return false;
}
}
} else {
Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
// See if the incoming value for the common predecessor is equal to the
// one for BB, in which case this phi node will not prevent the merging
// of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
+ << "predecessor " << IBB->getName() << "\n");
return false;
}
}
@@ -740,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8..4376265 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM)) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
}
// Split out the loop pre-header.
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
- ".preheader", this);
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", this, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
- NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
- << "\n");
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
- return NewBB;
+ return PreheaderBB;
}
/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
this, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader (this excludes
+ // loop headers which are targeted by an indirectbr).
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
@@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SmallVector<BasicBlock*, 8> OuterLoopPreds;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) != PN ||
- !L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirectbr edges.
- if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
- return 0;
-
+ !L->contains(PN->getIncomingBlock(i)))
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
- }
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
@@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SE->forgetLoop(L);
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
- OuterLoopPreds.size(),
- ".outer", this);
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
if (!Preheader)
return 0;
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa2..b96f14b 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return false;
}
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE)
- SE->forgetLoop(L);
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
@@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
}
DEBUG(dbgs() << "!\n");
}
@@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 0000000..b351852
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+ Value *OrigVal = PN->getIncomingValueForBlock(Latch);
+ Value *V = OrigVal;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 1)
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 0000000..8491c55
--- /dev/null
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,64 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
+ IRBuilder<> IRB(M.getContext());
+ FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+ StructType *Ty = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
+
+ Constant *RuntimeCtorInit = ConstantStruct::get(
+ Ty, IRB.getInt32(Priority), F, NULL);
+
+ // Get the current set of static global constructors and add the new ctor
+ // to the list.
+ SmallVector<Constant *, 16> CurrentCtors;
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ if (Constant *Init = GVCtor->getInitializer()) {
+ unsigned n = Init->getNumOperands();
+ CurrentCtors.reserve(n + 1);
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+ }
+ GVCtor->eraseFromParent();
+ }
+
+ CurrentCtors.push_back(RuntimeCtorInit);
+
+ // Create a new initializer.
+ ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
+ CurrentCtors.size());
+ Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+ // Create the new global variable and replace all uses of
+ // the old global variable with the new one.
+ (void)new GlobalVariable(M, NewInit->getType(), false,
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e942..e8f4285 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -590,7 +590,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fa8061c..e60a41b 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
User->eraseFromParent();
}
}
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts)
+ const {
+ return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4..bf2cb49 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
unsigned Cost = 0;
@@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
- FrontIt->isSafeToSpeculativelyExecute()) {
+ isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 76289c0..6732a78 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -107,8 +107,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
// Attempt to fold a binary operator with constant operand.
// e.g. ((I + 1) >> 2) => I >> 2
- if (IVOperand->getNumOperands() != 2 ||
- !isa<ConstantInt>(IVOperand->getOperand(1)))
+ if (!isa<BinaryOperator>(IVOperand)
+ || !isa<ConstantInt>(IVOperand->getOperand(1)))
return 0;
IVSrc = IVOperand->getOperand(0);
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f9..81eb9e0 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) {
const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -60,7 +63,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
}
char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
- false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
// Public interface to the simplify instructions pass.