diff options
-rw-r--r-- | lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 244 | ||||
-rw-r--r-- | test/Transforms/Mem2Reg/ignore-lifetime.ll | 26 | ||||
-rw-r--r-- | test/Transforms/Mem2Reg/use-analysis.ll | 70 |
3 files changed, 227 insertions, 113 deletions
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 1b51255..7afca82 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -45,6 +46,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -56,56 +58,13 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { - // FIXME: If the memory unit is of pointer or integer type, we can permit - // assignments to subsections of the memory unit. - - // Only allow direct and non-volatile loads and stores... - for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { // Loop over all of the uses of the alloca - const User *U = *UI; - if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Note that atomic loads can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (LI->isVolatile()) - return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. - // Note that atomic stores can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (SI->isVolatile()) - return false; - } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - return false; - } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) - return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) - return false; - if (!GEPI->hasAllZeroIndices()) - return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) - return false; - } else { - return false; - } - } - - return true; -} - namespace { -struct AllocaInfo { +struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { SmallVector<BasicBlock *, 32> DefiningBlocks; SmallVector<BasicBlock *, 32> UsingBlocks; + Type *AllocaTy; StoreInst *OnlyStore; BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; @@ -116,6 +75,7 @@ struct AllocaInfo { void clear() { DefiningBlocks.clear(); UsingBlocks.clear(); + AllocaTy = 0; OnlyStore = 0; OnlyBlock = 0; OnlyUsedInOneBlock = true; @@ -125,39 +85,107 @@ struct AllocaInfo { /// Scan the uses of the specified alloca, filling in the AllocaInfo used /// by the rest of the pass to reason about the uses of this alloca. - void AnalyzeAlloca(AllocaInst *AI) { + bool analyzeAlloca(AllocaInst &AI) { clear(); - // As we scan the uses of the alloca instruction, keep track of stores, - // and decide whether all of the loads and stores to the alloca are within - // the same basic block. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // Remember the basic blocks which define new values for the alloca - DefiningBlocks.push_back(SI->getParent()); - AllocaPointerVal = SI->getOperand(0); - OnlyStore = SI; - } else { - LoadInst *LI = cast<LoadInst>(User); - // Otherwise it must be a load instruction, keep track of variable - // reads. - UsingBlocks.push_back(LI->getParent()); - AllocaPointerVal = LI; - } + AllocaTy = AI.getAllocatedType(); + enqueueUsers(AI); + + // Walk queued up uses in the worklist to handle nested uses. + while (!UseWorklist.empty()) { + U = UseWorklist.pop_back_val(); + Instruction &I = *cast<Instruction>(U->getUser()); + if (!visit(I)) + return false; // Propagate failure to promote up. if (OnlyUsedInOneBlock) { if (OnlyBlock == 0) - OnlyBlock = User->getParent(); - else if (OnlyBlock != User->getParent()) + OnlyBlock = I.getParent(); + else if (OnlyBlock != I.getParent()) OnlyUsedInOneBlock = false; } } - DbgDeclare = FindAllocaDbgDeclare(AI); + DbgDeclare = FindAllocaDbgDeclare(&AI); + return true; + } + +private: + // Befriend the base class so it can call through private visitor methods. + friend class InstVisitor<AllocaInfo, bool>; + + /// \brief A use pointer that is non-null when visiting uses. + Use *U; + + /// \brief A worklist for recursively visiting all uses of an alloca. + SmallVector<Use *, 8> UseWorklist; + + /// \brief A set for preventing cyclic visitation. + SmallPtrSet<Use *, 8> VisitedUses; + + void enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; + ++UI) + if (VisitedUses.insert(&UI.getUse())) + UseWorklist.push_back(&UI.getUse()); + } + + bool visitLoadInst(LoadInst &LI) { + if (LI.isVolatile() || LI.getType() != AllocaTy) + return false; + + // Keep track of variable reads. + UsingBlocks.push_back(LI.getParent()); + AllocaPointerVal = &LI; + return true; + } + + bool visitStoreInst(StoreInst &SI) { + if (SI.isVolatile() || SI.getValueOperand() == U->get() || + SI.getValueOperand()->getType() != AllocaTy) + return false; + + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI.getParent()); + AllocaPointerVal = SI.getOperand(0); + OnlyStore = &SI; + return true; + } + + bool visitBitCastInst(BitCastInst &BC) { + enqueueUsers(BC); + return true; } + + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { + if (GEPI.use_empty()) + return true; + + enqueueUsers(GEPI); + + return GEPI.hasAllZeroIndices(); + } + + // We can promote through debug info intrinsics as they don't alter the + // value stored in memory. + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { return true; } + + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + default: + return false; + + // Lifetime intrinsics don't preclude promoting the memory to a register. + // FIXME: We should use these to promote to undef when outside of a valid + // lifetime. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + + // The fallback is that the alloca cannot be promoted. + bool visitInstruction(Instruction &I) { return false; } }; // Data package used by RenamePass() @@ -316,24 +344,56 @@ private: static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. + // FIXME: This function isn't actually specific to lifetime instrinsics. It + // also is redundant with the actual analysis of the loads and stores and + // should be folded into that. + + SmallVector<Instruction *, 8> Worklist; + SmallPtrSet<Instruction *, 8> Visited; + SmallVector<Instruction *, 8> Dead; + Worklist.push_back(AI); + + do { + Instruction *I = Worklist.pop_back_val(); - for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE;) { - Instruction *I = cast<Instruction>(*UI); - ++UI; - if (isa<LoadInst>(I) || isa<StoreInst>(I)) + if (I->use_empty()) { + Dead.push_back(I); continue; + } - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE;) { - Instruction *Inst = cast<Instruction>(*UI); - ++UI; - Inst->eraseFromParent(); - } + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (!isa<LoadInst>(*UI) && !isa<StoreInst>(*UI) && + Visited.insert(cast<Instruction>(*UI))) + Worklist.push_back(cast<Instruction>(*UI)); + } while (!Worklist.empty()); + + while (!Dead.empty()) { + Instruction *I = Dead.pop_back_val(); + + // Don't delete the alloca itself. + if (I == AI) + continue; + + // Note that we open code the deletion algorithm here because we know + // apriori that all of the instructions using an alloca that reaches here + // are trivially dead when their use list becomes empty (The only risk are + // lifetime markers which we specifically want to nuke). By coding it here + // we can skip the triviality test and be more efficient. + // + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; + ++OI) { + Instruction *Op = dyn_cast<Instruction>(*OI); + if (!Op) + continue; + + OI->set(0); + if (!Op->use_empty()) + continue; + + Dead.push_back(Op); } I->eraseFromParent(); } @@ -540,7 +600,7 @@ void PromoteMem2Reg::run() { for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; - assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + //assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); @@ -560,7 +620,9 @@ void PromoteMem2Reg::run() { // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. - Info.AnalyzeAlloca(AI); + bool Good = Info.analyzeAlloca(*AI); + (void)Good; + assert(Good && "Cannot promote non-promotable alloca!"); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. @@ -1087,8 +1149,16 @@ NextIteration: goto NextIteration; } -void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AliasSetTracker *AST) { +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // We cast away constness because we re-use the non-const analysis that the + // actual promotion routine uses. While it is non-const, it doesn't actually + // mutate anything at this phase, and we discard the non-const results that + // promotion uses to mutate the alloca. + return AllocaInfo().analyzeAlloca(*const_cast<AllocaInst *>(AI)); +} + +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, + DominatorTree &DT, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll deleted file mode 100644 index 5e4f9bf..0000000 --- a/test/Transforms/Mem2Reg/ignore-lifetime.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt -mem2reg -S -o - < %s | FileCheck %s - -declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) -declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr) - -define void @test1() { -; CHECK: test1 -; CHECK-NOT: alloca - %A = alloca i32 - %B = bitcast i32* %A to i8* - call void @llvm.lifetime.start(i64 2, i8* %B) - store i32 1, i32* %A - call void @llvm.lifetime.end(i64 2, i8* %B) - ret void -} - -define void @test2() { -; CHECK: test2 -; CHECK-NOT: alloca - %A = alloca {i8, i16} - %B = getelementptr {i8, i16}* %A, i32 0, i32 0 - call void @llvm.lifetime.start(i64 2, i8* %B) - store {i8, i16} zeroinitializer, {i8, i16}* %A - call void @llvm.lifetime.end(i64 2, i8* %B) - ret void -} diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll new file mode 100644 index 0000000..b08b1f1 --- /dev/null +++ b/test/Transforms/Mem2Reg/use-analysis.ll @@ -0,0 +1,70 @@ +; RUN: opt -mem2reg -S -o - < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) +declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr) + +define void @test1() { +; Ensure we can look through a bitcast to i8* and the addition of lifetime +; markers. +; +; CHECK-LABEL: @test1( +; CHECK-NOT: alloca +; CHECK: ret void + + %A = alloca i32 + %B = bitcast i32* %A to i8* + call void @llvm.lifetime.start(i64 2, i8* %B) + store i32 1, i32* %A + call void @llvm.lifetime.end(i64 2, i8* %B) + ret void +} + +define void @test2() { +; Ensure we can look through a GEP to i8* and the addition of lifetime +; markers. +; +; CHECK-LABEL: @test2( +; CHECK-NOT: alloca +; CHECK: ret void + + %A = alloca {i8, i16} + %B = getelementptr {i8, i16}* %A, i32 0, i32 0 + call void @llvm.lifetime.start(i64 2, i8* %B) + store {i8, i16} zeroinitializer, {i8, i16}* %A + call void @llvm.lifetime.end(i64 2, i8* %B) + ret void +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: @test3( +; +; Check that we recursively walk the uses of the alloca and thus can see +; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime +; markers. +entry: + %a = alloca i32 +; CHECK-NOT: alloca + + %b = bitcast i32* %a to i8* + %b2 = getelementptr inbounds i8* %b, i32 0 + %b3 = getelementptr inbounds i8* %b2, i32 0 + call void @llvm.lifetime.start(i64 -1, i8* %b3) +; CHECK-NOT: call void @llvm.lifetime.start + + store i32 %x, i32* %a +; CHECK-NOT: store + + %dead = bitcast i32* %a to i4096* + %dead1 = bitcast i4096* %dead to i42* + %dead2 = getelementptr inbounds i32* %a, i32 %x +; CHECK-NOT: bitcast +; CHECK-NOT: getelementptr + + %ret = load i32* %a +; CHECK-NOT: load + + ret i32 %ret +; CHECK: ret i32 %x +} |