diff options
author | Gordon Henriksen <gordonhenriksen@mac.com> | 2008-01-07 01:30:53 +0000 |
---|---|---|
committer | Gordon Henriksen <gordonhenriksen@mac.com> | 2008-01-07 01:30:53 +0000 |
commit | cc71a50ac7ba72f6405150297763be9c202c1d2b (patch) | |
tree | 16961ddedb70b284c1bab1039cd8350248bc5d3f /lib/Transforms | |
parent | df87fdce8ab5371dd29aca057cbca22924840166 (diff) | |
download | external_llvm-cc71a50ac7ba72f6405150297763be9c202c1d2b.zip external_llvm-cc71a50ac7ba72f6405150297763be9c202c1d2b.tar.gz external_llvm-cc71a50ac7ba72f6405150297763be9c202c1d2b.tar.bz2 |
With this patch, the LowerGC transformation becomes the
ShadowStackCollector, which additionally has reduced overhead with
no sacrifice in portability.
Considering a function @fun with 8 loop-local roots,
ShadowStackCollector introduces the following overhead
(x86):
; shadowstack prologue
movl L_llvm_gc_root_chain$non_lazy_ptr, %eax
movl (%eax), %ecx
movl $___gc_fun, 20(%esp)
movl $0, 24(%esp)
movl $0, 28(%esp)
movl $0, 32(%esp)
movl $0, 36(%esp)
movl $0, 40(%esp)
movl $0, 44(%esp)
movl $0, 48(%esp)
movl $0, 52(%esp)
movl %ecx, 16(%esp)
leal 16(%esp), %ecx
movl %ecx, (%eax)
; shadowstack loop overhead
(none)
; shadowstack epilogue
movl 48(%esp), %edx
movl %edx, (%ecx)
; shadowstack metadata
.align 3
___gc_fun: # __gc_fun
.long 8
.space 4
In comparison to LowerGC:
; lowergc prologue
movl L_llvm_gc_root_chain$non_lazy_ptr, %eax
movl (%eax), %ecx
movl %ecx, 48(%esp)
movl $8, 52(%esp)
movl $0, 60(%esp)
movl $0, 56(%esp)
movl $0, 68(%esp)
movl $0, 64(%esp)
movl $0, 76(%esp)
movl $0, 72(%esp)
movl $0, 84(%esp)
movl $0, 80(%esp)
movl $0, 92(%esp)
movl $0, 88(%esp)
movl $0, 100(%esp)
movl $0, 96(%esp)
movl $0, 108(%esp)
movl $0, 104(%esp)
movl $0, 116(%esp)
movl $0, 112(%esp)
; lowergc loop overhead
leal 44(%esp), %eax
movl %eax, 56(%esp)
leal 40(%esp), %eax
movl %eax, 64(%esp)
leal 36(%esp), %eax
movl %eax, 72(%esp)
leal 32(%esp), %eax
movl %eax, 80(%esp)
leal 28(%esp), %eax
movl %eax, 88(%esp)
leal 24(%esp), %eax
movl %eax, 96(%esp)
leal 20(%esp), %eax
movl %eax, 104(%esp)
leal 16(%esp), %eax
movl %eax, 112(%esp)
; lowergc epilogue
movl 48(%esp), %edx
movl %edx, (%ecx)
; lowergc metadata
(none)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45670 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Scalar/LowerGC.cpp | 350 |
1 files changed, 0 insertions, 350 deletions
diff --git a/lib/Transforms/Scalar/LowerGC.cpp b/lib/Transforms/Scalar/LowerGC.cpp index 8974986..e69de29 100644 --- a/lib/Transforms/Scalar/LowerGC.cpp +++ b/lib/Transforms/Scalar/LowerGC.cpp @@ -1,350 +0,0 @@ -//===-- LowerGC.cpp - Provide GC support for targets that don't -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements lowering for the llvm.gc* intrinsics for targets that do -// not natively support them (which includes the C backend). Note that the code -// generated is not as efficient as it would be for targets that natively -// support the GC intrinsics, but it is useful for getting new targets -// up-and-running quickly. -// -// This pass implements the code transformation described in this paper: -// "Accurate Garbage Collection in an Uncooperative Environment" -// Fergus Henderson, ISMM, 2002 -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lowergc" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -namespace { - class VISIBILITY_HIDDEN LowerGC : public FunctionPass { - /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the - /// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics. - Function *GCRootInt, *GCReadInt, *GCWriteInt; - - /// GCRead/GCWrite - These are the functions provided by the garbage - /// collector for read/write barriers. - Constant *GCRead, *GCWrite; - - /// RootChain - This is the global linked-list that contains the chain of GC - /// roots. - GlobalVariable *RootChain; - - /// MainRootRecordType - This is the type for a function root entry if it - /// had zero roots. - const Type *MainRootRecordType; - public: - static char ID; // Pass identification, replacement for typeid - LowerGC() : FunctionPass((intptr_t)&ID), - GCRootInt(0), GCReadInt(0), GCWriteInt(0), - GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {} - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - private: - const StructType *getRootRecordType(unsigned NumRoots); - }; - - char LowerGC::ID = 0; - RegisterPass<LowerGC> - X("lowergc", "Lower GC intrinsics, for GCless code generators"); -} - -/// createLowerGCPass - This function returns an instance of the "lowergc" -/// pass, which lowers garbage collection intrinsics to normal LLVM code. -FunctionPass *llvm::createLowerGCPass() { - return new LowerGC(); -} - -/// getRootRecordType - This function creates and returns the type for a root -/// record containing 'NumRoots' roots. -const StructType *LowerGC::getRootRecordType(unsigned NumRoots) { - // Build a struct that is a type used for meta-data/root pairs. - std::vector<const Type *> ST; - ST.push_back(GCRootInt->getFunctionType()->getParamType(0)); - ST.push_back(GCRootInt->getFunctionType()->getParamType(1)); - StructType *PairTy = StructType::get(ST); - - // Build the array of pairs. - ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots); - - // Now build the recursive list type. - PATypeHolder RootListH = - MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get(); - ST.clear(); - ST.push_back(PointerType::getUnqual(RootListH)); // Prev pointer - ST.push_back(Type::Int32Ty); // NumElements in array - ST.push_back(PairArrTy); // The pairs - StructType *RootList = StructType::get(ST); - if (MainRootRecordType) - return RootList; - - assert(NumRoots == 0 && "The main struct type should have zero entries!"); - cast<OpaqueType>((Type*)RootListH.get())->refineAbstractTypeTo(RootList); - MainRootRecordType = RootListH; - return cast<StructType>(RootListH.get()); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. If -/// not, this pass does not do anything. -bool LowerGC::doInitialization(Module &M) { - GCRootInt = M.getFunction("llvm.gcroot"); - GCReadInt = M.getFunction("llvm.gcread"); - GCWriteInt = M.getFunction("llvm.gcwrite"); - if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - - PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty); - PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - - // If the program is using read/write barriers, find the implementations of - // them from the GC runtime library. - if (GCReadInt) // Make: sbyte* %llvm_gc_read(sbyte**) - GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, VoidPtrPtr, - (Type *)0); - if (GCWriteInt) // Make: void %llvm_gc_write(sbyte*, sbyte**) - GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy, - VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0); - - // If the program has GC roots, get or create the global root list. - if (GCRootInt) { - const StructType *RootListTy = getRootRecordType(0); - const Type *PRLTy = PointerType::getUnqual(RootListTy); - M.addTypeName("llvm_gc_root_ty", RootListTy); - - // Get the root chain if it already exists. - RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy); - if (RootChain == 0) { - // If the root chain does not exist, insert a new one with linkonce - // linkage! - RootChain = new GlobalVariable(PRLTy, false, - GlobalValue::LinkOnceLinkage, - Constant::getNullValue(PRLTy), - "llvm_gc_root_chain", &M); - } else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) { - RootChain->setInitializer(Constant::getNullValue(PRLTy)); - RootChain->setLinkage(GlobalValue::LinkOnceLinkage); - } - } - return true; -} - -/// Coerce - If the specified operand number of the specified instruction does -/// not have the specified type, insert a cast. Note that this only uses BitCast -/// because the types involved are all pointers. -static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) { - if (I->getOperand(OpNum)->getType() != Ty) { - if (Constant *C = dyn_cast<Constant>(I->getOperand(OpNum))) - I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty)); - else { - CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I); - I->setOperand(OpNum, CI); - } - } -} - -/// runOnFunction - If the program is using GC intrinsics, replace any -/// read/write intrinsics with the appropriate read/write barrier calls, then -/// inline them. Finally, build the data structures for -bool LowerGC::runOnFunction(Function &F) { - // Quick exit for programs that are not using GC mechanisms. - if (!GCRootInt && !GCReadInt && !GCWriteInt) return false; - - PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty); - PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr); - - // If there are read/write barriers in the program, perform a quick pass over - // the function eliminating them. While we are at it, remember where we see - // calls to llvm.gcroot. - std::vector<CallInst*> GCRoots; - std::vector<CallInst*> NormalCalls; - - bool MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (CallInst *CI = dyn_cast<CallInst>(II++)) { - if (!CI->getCalledFunction() || - !CI->getCalledFunction()->isIntrinsic()) - NormalCalls.push_back(CI); // Remember all normal function calls. - - if (Function *F = CI->getCalledFunction()) - if (F == GCRootInt) - GCRoots.push_back(CI); - else if (F == GCReadInt || F == GCWriteInt) { - if (F == GCWriteInt) { - // Change a llvm.gcwrite call to call llvm_gc_write instead. - CI->setOperand(0, GCWrite); - // Insert casts of the operands as needed. - Coerce(CI, 1, VoidPtr); - Coerce(CI, 2, VoidPtr); - Coerce(CI, 3, VoidPtrPtr); - } else { - Coerce(CI, 1, VoidPtr); - Coerce(CI, 2, VoidPtrPtr); - if (CI->getType() == VoidPtr) { - CI->setOperand(0, GCRead); - } else { - // Create a whole new call to replace the old one. - - // It sure would be nice to pass op_begin()+1, - // op_begin()+2 but it runs into trouble with - // CallInst::init's &*iterator, which requires a - // conversion from Use* to Value*. The conversion - // from Use to Value * is not useful because the - // memory for Value * won't be contiguous. - Value* Args[] = { - CI->getOperand(1), - CI->getOperand(2) - }; - CallInst *NC = new CallInst(GCRead, Args, Args + 2, - CI->getName(), CI); - // These functions only deal with ptr type results so BitCast - // is the correct kind of cast (no-op cast). - Value *NV = new BitCastInst(NC, CI->getType(), "", CI); - CI->replaceAllUsesWith(NV); - BB->getInstList().erase(CI); - CI = NC; - } - } - - MadeChange = true; - } - } - - // If there are no GC roots in this function, then there is no need to create - // a GC list record for it. - if (GCRoots.empty()) return MadeChange; - - // Okay, there are GC roots in this function. On entry to the function, add a - // record to the llvm_gc_root_chain, and remove it on exit. - - // Create the alloca, and zero it out. - const StructType *RootListTy = getRootRecordType(GCRoots.size()); - AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", F.begin()->begin()); - - // Insert the memset call after all of the allocas in the function. - BasicBlock::iterator IP = AI; - while (isa<AllocaInst>(IP)) ++IP; - - Constant *Zero = ConstantInt::get(Type::Int32Ty, 0); - Constant *One = ConstantInt::get(Type::Int32Ty, 1); - - Value *Idx[2] = { Zero, Zero }; - - // Get a pointer to the prev pointer. - Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2, - "prevptrptr", IP); - - // Load the previous pointer. - Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP); - // Store the previous pointer into the prevptrptr - new StoreInst(PrevPtr, PrevPtrPtr, IP); - - // Set the number of elements in this record. - Idx[1] = One; - Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2, - "numeltsptr", IP); - new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), NumEltsPtr,IP); - - Value* Par[4]; - Par[0] = Zero; - Par[1] = ConstantInt::get(Type::Int32Ty, 2); - - const PointerType *PtrLocTy = - cast<PointerType>(GCRootInt->getFunctionType()->getParamType(0)); - Constant *Null = ConstantPointerNull::get(PtrLocTy); - - // Initialize all of the gcroot records now. - for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) { - // Initialize the meta-data pointer. - Par[2] = ConstantInt::get(Type::Int32Ty, i); - Par[3] = One; - Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4, - "MetaDataPtr", IP); - assert(isa<Constant>(GCRoots[i]->getOperand(2)) && "Must be a constant"); - new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP); - - // Initialize the root pointer to null on entry to the function. - Par[3] = Zero; - Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4, - "RootEntPtr", IP); - new StoreInst(Null, RootPtrPtr, IP); - - // Each occurrance of the llvm.gcroot intrinsic now turns into an - // initialization of the slot with the address. - new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]); - } - - // Now that the record is all initialized, store the pointer into the global - // pointer. - Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), "", IP); - new StoreInst(C, RootChain, IP); - - // Eliminate all the gcroot records now. - for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) - GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]); - - // On exit from the function we have to remove the entry from the GC root - // chain. Doing this is straight-forward for return and unwind instructions: - // just insert the appropriate copy. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (isa<UnwindInst>(BB->getTerminator()) || - isa<ReturnInst>(BB->getTerminator())) { - // We could reuse the PrevPtr loaded on entry to the function, but this - // would make the value live for the whole function, which is probably a - // bad idea. Just reload the value out of our stack entry. - PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator()); - new StoreInst(PrevPtr, RootChain, BB->getTerminator()); - } - - // If an exception is thrown from a callee we have to make sure to - // unconditionally take the record off the stack. For this reason, we turn - // all call instructions into invoke whose cleanup pops the entry off the - // stack. We only insert one cleanup block, which is shared by all invokes. - if (!NormalCalls.empty()) { - // Create the shared cleanup block. - BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F); - UnwindInst *UI = new UnwindInst(Cleanup); - PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI); - new StoreInst(PrevPtr, RootChain, UI); - - // Loop over all of the function calls, turning them into invokes. - while (!NormalCalls.empty()) { - CallInst *CI = NormalCalls.back(); - BasicBlock *CBB = CI->getParent(); - NormalCalls.pop_back(); - - // Split the basic block containing the function call. - BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont"); - - // Remove the unconditional branch inserted at the end of the CBB. - CBB->getInstList().pop_back(); - NewBB->getInstList().remove(CI); - - // Create a new invoke instruction. - std::vector<Value*> Args(CI->op_begin()+1, CI->op_end()); - - Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup, - Args.begin(), Args.end(), CI->getName(), CBB); - cast<InvokeInst>(II)->setCallingConv(CI->getCallingConv()); - cast<InvokeInst>(II)->setParamAttrs(CI->getParamAttrs()); - CI->replaceAllUsesWith(II); - delete CI; - } - } - - return true; -} |