diff options
Diffstat (limited to 'lib/Transforms/Utils')
21 files changed, 665 insertions, 141 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 8e5a1eb..d831452 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, // Check to see if this value is already used in the memory instruction's // block. If so, it's already live into the block at the very least, so we // can reasonably fold it. - BasicBlock *MemBB = MemoryInst->getParent(); - for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); - UI != E; ++UI) - // We know that uses of arguments and instructions have to be instructions. - if (cast<Instruction>(*UI)->getParent() == MemBB) - return true; - - return false; + return Val->isUsedInBasicBlock(MemoryInst->getParent()); } diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index d41096f..e7334c0 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -16,10 +16,12 @@ transforms_utils_SRC_FILES := \ Local.cpp \ LoopSimplify.cpp \ LoopUnroll.cpp \ + LoopUnrollRuntime.cpp \ LowerExpectIntrinsic.cpp \ LowerInvoke.cpp \ LowerSwitch.cpp \ Mem2Reg.cpp \ + ModuleUtils.cpp \ PromoteMemoryToRegister.cpp \ SSAUpdater.cpp \ SimplifyCFG.cpp \ diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index a7f9efd..ef4a473 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { + ArrayRef<BasicBlock*> Preds, + const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. @@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. - if (NumPreds == 0) { + if (Preds.size() == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); @@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), - P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, - P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); return NewBB; } diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index 23a30cc..50c91b6 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -131,8 +131,8 @@ void BasicInlinerImpl::inlineFunctions() { // Inline InlineFunctionInfo IFI(0, TD); if (InlineFunction(CS, IFI)) { - if (Callee->use_empty() && (Callee->hasLocalLinkage() || - Callee->hasAvailableExternallyLinkage())) + Callee->removeDeadConstantUsers(); + if (Callee->isDefTriviallyDead()) DeadFunctions.insert(Callee); Changed = true; CallSites.erase(CallSites.begin() + index); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c052910..f752d79 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), - "split", P); + SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 4b5f45b..a808303 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -15,11 +15,15 @@ #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include "llvm/Intrinsics.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B, const AttrListPtr &Attrs) { - char NameBuffer[20]; +Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, + const AttrListPtr &Attrs) { + SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. - unsigned NameLen = strlen(Name); - assert(NameLen < sizeof(NameBuffer)-2); - memcpy(NameBuffer, Name, NameLen); + NameBuffer += Name; if (Op->getType()->isFloatTy()) - NameBuffer[NameLen] = 'f'; // floorf + NameBuffer += 'f'; // floorf else - NameBuffer[NameLen] = 'l'; // floorl - NameBuffer[NameLen+1] = 0; + NameBuffer += 'l'; // floorl Name = NameBuffer; } @@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); else - F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7adc5f1..d96f59c 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -14,10 +14,12 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LoopUnrollRuntime.cpp LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + ModuleUtils.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp @@ -27,11 +29,3 @@ add_llvm_library(LLVMTransformUtils Utils.cpp ValueMapper.cpp ) - -add_llvm_library_dependencies(LLVMTransformUtils - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMipa - ) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index cf21f1e..c6dfe73 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -113,8 +113,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - VMap[&BB] = CBB; // Add basic block mapping. + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } @@ -224,6 +239,22 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), + const_cast<BasicBlock*>(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; // Loop over all instructions, and copy them over, DCE'ing as we go. This diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 8cc2649..3ef6b01 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -97,8 +97,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, InsertPt = II.getNormalDest()->begin(); } - for (; isa<PHINode>(InsertPt); ++InsertPt) - /* empty */; // Don't insert before any PHI nodes. + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before any PHI nodes or landingpad instrs. new StoreInst(&I, Slot, InsertPt); return Slot; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5464dbc..f89e1b1 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } - // Find the personality function used by the landing pads of the caller. If it - // exists, then check to see that it matches the personality function used in - // the callee. - for (Function::const_iterator - I = Caller->begin(), E = Caller->end(); I != E; ++I) + // Get the personality function from the callee if it contains a landing pad. + Value *CalleePersonality = 0; + for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); + I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'isa' here should become go away once the new EH system is - // in place. - if (!isa<LandingPadInst>(BB->getFirstNonPHI())) - continue; - const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); - const Value *CallerPersFn = LP->getPersonalityFn(); - - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - for (Function::const_iterator - I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) - if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once - // the new EH system is in place. - if (const LandingPadInst *LP = - dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) - if (CallerPersFn != LP->getPersonalityFn()) - return false; - break; - } - + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + CalleePersonality = LP->getPersonalityFn(); break; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + if (CalleePersonality) + for (Function::const_iterator I = Caller->begin(), E = Caller->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system + // is in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + if (LP->getPersonalityFn() != CalleePersonality) + return false; + + break; + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // @@ -987,7 +991,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt new file mode 100644 index 0000000..88b2ffe --- /dev/null +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Transforms/Utils/LLVMBuild.txt ---------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TransformUtils +parent = Transforms +required_libraries = Analysis Core IPA Support Target diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 7034feb..4dd93cf 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" @@ -257,6 +258,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { II->getIntrinsicID() == Intrinsic::lifetime_end) return isa<UndefValue>(II->getArgOperand(1)); } + + if (extractMallocCall(I)) return true; + + if (CallInst *CI = isFreeCall(I)) + if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) + return C->isNullValue() || isa<UndefValue>(C); + return false; } @@ -486,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { if (Succ->getSinglePredecessor()) return true; // Make a list of the predecessors of BB - typedef SmallPtrSet<BasicBlock*, 16> BlockSet; - BlockSet BBPreds(pred_begin(BB), pred_end(BB)); - - // Use that list to make another list of common predecessors of BB and Succ - BlockSet CommonPreds; - for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (BBPreds.count(P)) - CommonPreds.insert(P); - } + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - // Shortcut, if there are no common predecessors, merging is always safe - if (CommonPreds.empty()) - return true; - // Look at all the phi nodes in Succ, to see if they present a conflict when // merging these blocks for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { @@ -512,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // merge the phi nodes and then the blocks can still be merged PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); if (BBPN && BBPN->getParent() == BB) { - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - if (BBPN->getIncomingValueForBlock(*PI) - != PN->getIncomingValueForBlock(*PI)) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " - << (*PI)->getName() << "\n"); + << IBB->getName() << "\n"); return false; } } } else { Value* Val = PN->getIncomingValueForBlock(BB); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { // See if the incoming value for the common predecessor is equal to the // one for BB, in which case this phi node will not prevent the merging // of the block. - if (Val != PN->getIncomingValueForBlock(*PI)) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getName() << "\n"); + << "predecessor " << IBB->getName() << "\n"); return false; } } @@ -740,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // If there is a large requested alignment and we can, bump up the alignment // of the global. if (GV->isDeclaration()) return Align; + // If the memory we set aside for the global may not be the memory used by + // the final program then it is impossible for us to reliably enforce the + // preferred alignment. + if (GV->isWeakForLinker()) return Align; if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index cbd54a8..4376265 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -99,7 +99,8 @@ namespace { bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); BasicBlock *InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, @@ -240,7 +241,7 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (SeparateNestedLoop(L, LPM)) { + if (SeparateNestedLoop(L, LPM, Preheader)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. Changed = true; @@ -265,7 +266,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { } // Split out the loop pre-header. - BasicBlock *NewBB = - SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), - ".preheader", this); + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", this, NewBBs); + PreheaderBB = NewBBs[0]; + } - NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() - << "\n"); + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - return NewBB; + return PreheaderBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { this, NewBBs); NewExitBB = NewBBs[0]; } else { - NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " @@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader) { + // Don't try to separate loops without a preheader (this excludes + // loop headers which are targeted by an indirectbr). + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!L->getHeader()->isLandingPad() && + "Can't insert backedge to landing pad"); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. @@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SmallVector<BasicBlock*, 8> OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirectbr edges. - if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) - return 0; - + !L->contains(PN->getIncomingBlock(i))) OuterLoopPreds.push_back(PN->getIncomingBlock(i)); - } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); @@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SE->forgetLoop(L); BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], - OuterLoopPreds.size(), - ".outer", this); + BasicBlock *NewBB = + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { if (!Preheader) return 0; + // The header is not a landing pad; preheader insertion should ensure this. + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 62e4fa2..b96f14b 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { + bool AllowRuntime, unsigned TripMultiple, + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE) - SE->forgetLoop(L); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + return false; + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BasicBlock *Dest = Headers[j]; bool NeedConditional = true; + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 0000000..b351852 --- /dev/null +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,374 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations. Other strategies +// include generate a loop before or after the unrolled loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, + BasicBlock *LastPrologBB, BasicBlock *PrologEnd, + BasicBlock *OrigPH, BasicBlock *NewPH, + ValueToValueMapTy &LVMap, Pass *P) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch != 0 && "Loop must have a latch"); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI name is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); + SBI != SBE; ++SBI) { + for (BasicBlock::iterator BBI = (*SBI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", + PrologEnd->getTerminator()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + } else { + NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); + } + Value *OrigVal = PN->getIncomingValueForBlock(Latch); + Value *V = OrigVal; + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = LVMap[I]; + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, LastPrologBB); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + } else { + PN->addIncoming(NewPN, PrologEnd); + } + } + } + + // Create a branch around the orignal loop, which is taken if the + // trip count is less than the unroll factor. + Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *BrLoopExit = + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, + ConstantInt::get(TripCount->getType(), Count)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit != 0 && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + if (!Exit->isLandingPad()) { + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", + P, NewBBs); + } + // Add the branch to the exit block (around the unrolled loop) + BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// This function doesn't create a clone of the loop structure. +/// +/// There are two value maps that are defined and used. VMap is +/// for the values in the current loop instance. LVMap contains +/// the values from the last loop instance. We need the LVMap values +/// to update the inital values for the current loop instance. +/// +static void CloneLoopBlocks(Loop *L, + bool FirstCopy, + BasicBlock *InsertTop, + BasicBlock *InsertBot, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, + LoopInfo *LI) { + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); + NewBlocks.push_back(NewBB); + + if (Loop *ParentLoop = L->getParentLoop()) + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block + InsertTop->getTerminator()->setSuccessor(0, NewBB); + + // Change the incoming values to the ones defined in the + // previously cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[I]); + if (FirstCopy) { + // We replace the first phi node with the value from the preheader + VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + NewBB->getInstList().erase(NewPHI); + } else { + // Update VMap with values from the previous block + unsigned idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + if (Instruction *I = dyn_cast<Instruction>(InVal)) + if (L->contains(I)) + InVal = LVMap[InVal]; + NewPHI->setIncomingValue(idx, InVal); + NewPHI->setIncomingBlock(idx, InsertTop); + } + } + } + + if (Latch == *BB) { + VMap.erase((*BB)->getTerminator()); + NewBB->getTerminator()->eraseFromParent(); + BranchInst::Create(InsertBot, NewBB); + } + } + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + LVMap[VI->first] = VI->second; + } +} + +/// Insert code in the prolog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodes in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// if (extraiters == loopfactor) jump L1 +/// if (extraiters == loopfactor-1) jump L2 +/// ... +/// L1: LoopBody; +/// L2: LoopBody; +/// ... +/// if tripcount < loopfactor jump End +/// Loop: +/// ... +/// End: +/// +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager *LPM) { + // for now, only unroll loops that contain a single exit + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() > 1) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more + // loops to be unrolled than relying on induction var simplification + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE == 0) + return false; + + // Only unroll loops with a computable trip count and the trip count needs + // to be an int value (allowing a pointer type is a TODO item) + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + return false; + + // Add 1 since the backedge count doesn't include the first loop iteration + const SCEV *TripCountSC = + SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + // We only handle cases when the unroll factor is a power of 2. + // Count is the loop unroll factor, the number of extra copies added + 1. + if ((Count & (Count-1)) != 0) + return false; + + // If this loop is nested, then the loop unroller changes the code in + // parent loop, so the Scalar Evolution pass needs to be run again + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + BasicBlock *PH = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // It helps to splits the original preheader twice, one for the end of the + // prolog code and one for a new loop preheader + BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % (loop unroll factor + 1) + SCEVExpander Expander(*SE, "loop-unroll"); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Type *CountTy = TripCount->getType(); + BinaryOperator *ModVal = + BinaryOperator::CreateURem(TripCount, + ConstantInt::get(CountTy, Count), + "xtraiter"); + ModVal->insertBefore(PreHeaderBR); + + // Check if for no extra iterations, then jump to unrolled loop + Value *BranchVal = new ICmpInst(PreHeaderBR, + ICmpInst::ICMP_NE, ModVal, + ConstantInt::get(CountTy, 0), "lcmp"); + // Branch to either the extra iterations or the unrolled loop + // We will fix up the true branch label when adding loop body copies + BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); + assert(PreHeaderBR->isUnconditional() && + PreHeaderBR->getSuccessor(0) == PEnd && + "CFG edges in Preheader are not correct"); + PreHeaderBR->eraseFromParent(); + + ValueToValueMapTy LVMap; + Function *F = Header->getParent(); + // These variables are used to update the CFG links in each iteration + BasicBlock *CompareBB = 0; + BasicBlock *LastLoopBB = PH; + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { + std::vector<BasicBlock*> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop, but we don't clone the loop + // This function adds the appropriate CFG connections. + CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, + LoopBlocks, VMap, LVMap, LI); + LastLoopBB = cast<BasicBlock>(VMap[Latch]); + + // Insert the cloned blocks into function just before the original loop + F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Generate the code for the comparison which determines if the loop + // prolog code needs to be executed. + if (leftOverIters == Count-1) { + // There is no compare block for the fall-thru case when for the last + // left over iteration + CompareBB = NewBlocks[0]; + } else { + // Create a new block for the comparison + BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", + F, CompareBB); + if (Loop *ParentLoop = L->getParentLoop()) { + // Add the new block to the parent loop, if needed + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + + // The comparison w/ the extra iteration value and branch + Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, + ConstantInt::get(CountTy, leftOverIters), + "un.tmp"); + // Branch to either the extra iterations or the unrolled loop + BranchInst::Create(NewBlocks[0], CompareBB, + BranchVal, NewBB); + CompareBB = NewBB; + PH->getTerminator()->setSuccessor(0, NewBB); + VMap[NewPH] = CompareBB; + } + + // Rewrite the cloned instruction operands to use the values + // created when the clone is created. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) { + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + } + } + } + + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, + LPM->getAsPass()); + NumRuntimeUnrolled++; + return true; +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 0000000..8491c55 --- /dev/null +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,64 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +static void appendToGlobalArray(const char *Array, + Module &M, Function *F, int Priority) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + StructType *Ty = StructType::get( + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); + + Constant *RuntimeCtorInit = ConstantStruct::get( + Ty, IRB.getInt32(Priority), F, NULL); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector<Constant *, 16> CurrentCtors; + if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); + } + GVCtor->eraseFromParent(); + } + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), + CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index db3e942..e8f4285 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -590,7 +590,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index fa8061c..e60a41b 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { User->eraseFromParent(); } } + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) + const { + return std::find(Insts.begin(), Insts.end(), I) != Insts.end(); +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b8c3ab4..bf2cb49 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; unsigned Cost = 0; @@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = 0; if (&*FrontIt != Cond && FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && - FrontIt->isSafeToSpeculativelyExecute()) { + isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 76289c0..6732a78 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -107,8 +107,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 - if (IVOperand->getNumOperands() != 2 || - !isa<ConstantInt>(IVOperand->getOperand(1))) + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) return 0; IVSrc = IVOperand->getOperand(0); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index ac005f9..81eb9e0 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,12 +40,14 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -60,7 +63,7 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) @@ -84,8 +87,11 @@ namespace { } char InstSimplifier::ID = 0; -INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", - false, false) +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; // Public interface to the simplify instructions pass. |
