diff options
author | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
---|---|---|
committer | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
commit | f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch) | |
tree | ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /lib/Transforms/IPO | |
download | external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2 |
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/IPO')
20 files changed, 8328 insertions, 0 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp new file mode 100644 index 0000000..9a7bcc7 --- /dev/null +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -0,0 +1,559 @@ +//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass promotes "by reference" arguments to be "by value" arguments. In +// practice, this means looking for internal functions that have pointer +// arguments. If we can prove, through the use of alias analysis, that an +// argument is *only* loaded, then we can pass the value into the function +// instead of the address of the value. This can cause recursive simplification +// of code and lead to the elimination of allocas (especially in C++ template +// code like the STL). +// +// This pass also handles aggregate arguments that are passed into a function, +// scalarizing them if the elements of the aggregate are only loaded. Note that +// we refuse to scalarize aggregates which would require passing in more than +// three operands to the function, because we don't want to pass thousands of +// operands for a large array or structure! +// +// Note that this transformation could also be done for arguments that are only +// stored to (returning the value instead), but we do not currently handle that +// case. This case would be best handled when and if we start supporting +// multiple return values from functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "argpromotion" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" +#include <set> +using namespace llvm; + +STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); +STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); +STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); + +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetData>(); + CallGraphSCCPass::getAnalysisUsage(AU); + } + + virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + static char ID; // Pass identification, replacement for typeid + ArgPromotion() : CallGraphSCCPass((intptr_t)&ID) {} + + private: + bool PromoteArguments(CallGraphNode *CGN); + bool isSafeToPromoteArgument(Argument *Arg) const; + Function *DoPromotion(Function *F, std::vector<Argument*> &ArgsToPromote); + }; + + char ArgPromotion::ID = 0; + RegisterPass<ArgPromotion> X("argpromotion", + "Promote 'by reference' arguments to scalars"); +} + +Pass *llvm::createArgumentPromotionPass() { + return new ArgPromotion(); +} + +bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { + bool Changed = false, LocalChange; + + do { // Iterate until we stop promoting from this SCC. + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + LocalChange |= PromoteArguments(SCC[i]); + Changed |= LocalChange; // Remember that we changed something. + } while (LocalChange); + + return Changed; +} + +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { + Function *F = CGN->getFunction(); + + // Make sure that it is local to this module. + if (!F || !F->hasInternalLinkage()) return false; + + // First check: see if there are any pointer arguments! If not, quick exit. + std::vector<Argument*> PointerArgs; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + if (isa<PointerType>(I->getType())) + PointerArgs.push_back(I); + if (PointerArgs.empty()) return false; + + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); + UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + if (!CS.getInstruction()) // "Taking the address" of the function + return false; + + // Ensure that this call site is CALLING the function, not passing it as + // an argument. + for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); + AI != E; ++AI) + if (*AI == F) return false; // Passing the function address in! + } + + // Check to see which arguments are promotable. If an argument is not + // promotable, remove it from the PointerArgs vector. + for (unsigned i = 0; i != PointerArgs.size(); ++i) + if (!isSafeToPromoteArgument(PointerArgs[i])) { + std::swap(PointerArgs[i--], PointerArgs.back()); + PointerArgs.pop_back(); + } + + // No promotable pointer arguments. + if (PointerArgs.empty()) return false; + + // Okay, promote all of the arguments are rewrite the callees! + Function *NewF = DoPromotion(F, PointerArgs); + + // Update the call graph to know that the old function is gone. + getAnalysis<CallGraph>().changeFunction(F, NewF); + return true; +} + +/// IsAlwaysValidPointer - Return true if the specified pointer is always legal +/// to load. +static bool IsAlwaysValidPointer(Value *V) { + if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true; + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) + return IsAlwaysValidPointer(GEP->getOperand(0)); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::GetElementPtr) + return IsAlwaysValidPointer(CE->getOperand(0)); + + return false; +} + +/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + + unsigned ArgNo = std::distance(Callee->arg_begin(), + Function::arg_iterator(Arg)); + + // Look at all call sites of the function. At this pointer we know we only + // have direct callees. + for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); + UI != E; ++UI) { + CallSite CS = CallSite::get(*UI); + assert(CS.getInstruction() && "Should only have direct calls!"); + + if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) + return false; + } + return true; +} + + +/// isSafeToPromoteArgument - As you might guess from the name of this method, +/// it checks to see if it is both safe and useful to promote the argument. +/// This method limits promotion of aggregates to only promote up to three +/// elements of the aggregate in order to avoid exploding the number of +/// arguments passed in. +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg) const { + // We can only promote this argument if all of the uses are loads, or are GEP + // instructions (with constant indices) that are subsequently loaded. + bool HasLoadInEntryBlock = false; + BasicBlock *EntryBlock = Arg->getParent()->begin(); + std::vector<LoadInst*> Loads; + std::vector<std::vector<ConstantInt*> > GEPIndices; + for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); + UI != E; ++UI) + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + HasLoadInEntryBlock |= LI->getParent() == EntryBlock; + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + if (GEP->use_empty()) { + // Dead GEP's cause trouble later. Just remove them if we run into + // them. + getAnalysis<AliasAnalysis>().deleteValue(GEP); + GEP->getParent()->getInstList().erase(GEP); + return isSafeToPromoteArgument(Arg); + } + // Ensure that all of the indices are constants. + std::vector<ConstantInt*> Operands; + for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) + if (ConstantInt *C = dyn_cast<ConstantInt>(GEP->getOperand(i))) + Operands.push_back(C); + else + return false; // Not a constant operand GEP! + + // Ensure that the only users of the GEP are load instructions. + for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); + UI != E; ++UI) + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LI->isVolatile()) return false; // Don't hack volatile loads + Loads.push_back(LI); + HasLoadInEntryBlock |= LI->getParent() == EntryBlock; + } else { + return false; + } + + // See if there is already a GEP with these indices. If not, check to + // make sure that we aren't promoting too many elements. If so, nothing + // to do. + if (std::find(GEPIndices.begin(), GEPIndices.end(), Operands) == + GEPIndices.end()) { + if (GEPIndices.size() == 3) { + DOUT << "argpromotion disable promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than 3 arguments to the function.\n"; + // We limit aggregate promotion to only promoting up to three elements + // of the aggregate. + return false; + } + GEPIndices.push_back(Operands); + } + } else { + return false; // Not a load or a GEP. + } + + if (Loads.empty()) return true; // No users, this is a dead argument. + + // If we decide that we want to promote this argument, the value is going to + // be unconditionally loaded in all callees. This is only safe to do if the + // pointer was going to be unconditionally loaded anyway (i.e. there is a load + // of the pointer in the entry block of the function) or if we can prove that + // all pointers passed in are always to legal locations (for example, no null + // pointers are passed in, no pointers to free'd memory, etc). + if (!HasLoadInEntryBlock && !AllCalleesPassInValidPointerForArgument(Arg)) + return false; // Cannot prove that this is safe!! + + // Okay, now we know that the argument is only used by load instructions and + // it is safe to unconditionally load the pointer. Use alias analysis to + // check to see if the pointer is guaranteed to not be modified from entry of + // the function to each of the load instructions. + + // Because there could be several/many load instructions, remember which + // blocks we know to be transparent to the load. + std::set<BasicBlock*> TranspBlocks; + + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + TargetData &TD = getAnalysis<TargetData>(); + + for (unsigned i = 0, e = Loads.size(); i != e; ++i) { + // Check to see if the load is invalidated from the start of the block to + // the load itself. + LoadInst *Load = Loads[i]; + BasicBlock *BB = Load->getParent(); + + const PointerType *LoadTy = + cast<PointerType>(Load->getOperand(0)->getType()); + unsigned LoadSize = (unsigned)TD.getTypeSize(LoadTy->getElementType()); + + if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) + return false; // Pointer is invalidated! + + // Now check every path from the entry block to the load for transparency. + // To do this, we perform a depth first search on the inverse CFG from the + // loading block. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (idf_ext_iterator<BasicBlock*> I = idf_ext_begin(*PI, TranspBlocks), + E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + if (AA.canBasicBlockModify(**I, Arg, LoadSize)) + return false; + } + + // If the path from the entry of the function to each load is free of + // instructions that potentially invalidate the load, we can make the + // transformation! + return true; +} + +namespace { + /// GEPIdxComparator - Provide a strong ordering for GEP indices. All Value* + /// elements are instances of ConstantInt. + /// + struct GEPIdxComparator { + bool operator()(const std::vector<Value*> &LHS, + const std::vector<Value*> &RHS) const { + unsigned idx = 0; + for (; idx < LHS.size() && idx < RHS.size(); ++idx) { + if (LHS[idx] != RHS[idx]) { + return cast<ConstantInt>(LHS[idx])->getZExtValue() < + cast<ConstantInt>(RHS[idx])->getZExtValue(); + } + } + + // Return less than if we ran out of stuff in LHS and we didn't run out of + // stuff in RHS. + return idx == LHS.size() && idx != RHS.size(); + } + }; +} + + +/// DoPromotion - This method actually performs the promotion of the specified +/// arguments, and returns the new function. At this point, we know that it's +/// safe to do so. +Function *ArgPromotion::DoPromotion(Function *F, + std::vector<Argument*> &Args2Prom) { + std::set<Argument*> ArgsToPromote(Args2Prom.begin(), Args2Prom.end()); + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has modified arguments. + const FunctionType *FTy = F->getFunctionType(); + std::vector<const Type*> Params; + + typedef std::set<std::vector<Value*>, GEPIdxComparator> ScalarizeTable; + + // ScalarizedElements - If we are promoting a pointer that has elements + // accessed out of it, keep track of which elements are accessed so that we + // can add one argument for each. + // + // Arguments that are directly loaded will have a zero element value here, to + // handle cases where there are both a direct load and GEP accesses. + // + std::map<Argument*, ScalarizeTable> ScalarizedElements; + + // OriginalLoads - Keep track of a representative load instruction from the + // original function so that we can tell the alias analysis implementation + // what the new GEP/Load instructions we are inserting look like. + std::map<std::vector<Value*>, LoadInst*> OriginalLoads; + + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + if (!ArgsToPromote.count(I)) { + Params.push_back(I->getType()); + } else if (I->use_empty()) { + ++NumArgumentsDead; + } else { + // Okay, this is being promoted. Check to see if there are any GEP uses + // of the argument. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); + std::vector<Value*> Indices(User->op_begin()+1, User->op_end()); + ArgIndices.insert(Indices); + LoadInst *OrigLoad; + if (LoadInst *L = dyn_cast<LoadInst>(User)) + OrigLoad = L; + else + OrigLoad = cast<LoadInst>(User->use_back()); + OriginalLoads[Indices] = OrigLoad; + } + + // Add a parameter to the function for each element passed in. + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) + Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), + &(*SI)[0], + SI->size())); + + if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + ++NumArgumentsPromoted; + else + ++NumAggregatesPromoted; + } + + const Type *RetTy = FTy->getReturnType(); + + // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which + // have zero fixed arguments. + bool ExtraArgHack = false; + if (Params.empty() && FTy->isVarArg()) { + ExtraArgHack = true; + Params.push_back(Type::Int32Ty); + } + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + + // Create the new function body and insert it into the module... + Function *NF = new Function(NFTy, F->getLinkage(), F->getName()); + NF->setCallingConv(F->getCallingConv()); + F->getParent()->getFunctionList().insert(F, NF); + + // Get the alias analysis information that we need to update to reflect our + // changes. + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in the loaded pointers. + // + std::vector<Value*> Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + Instruction *Call = CS.getInstruction(); + + // Loop over the operands, inserting GEP and loads in the caller as + // appropriate. + CallSite::arg_iterator AI = CS.arg_begin(); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI) + if (!ArgsToPromote.count(I)) + Args.push_back(*AI); // Unmodified argument + else if (!I->use_empty()) { + // Non-dead argument: insert GEPs and loads as appropriate. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + for (ScalarizeTable::iterator SI = ArgIndices.begin(), + E = ArgIndices.end(); SI != E; ++SI) { + Value *V = *AI; + LoadInst *OrigLoad = OriginalLoads[*SI]; + if (!SI->empty()) { + V = new GetElementPtrInst(V, &(*SI)[0], SI->size(), + V->getName()+".idx", Call); + AA.copyValue(OrigLoad->getOperand(0), V); + } + Args.push_back(new LoadInst(V, V->getName()+".val", Call)); + AA.copyValue(OrigLoad, Args.back()); + } + } + + if (ExtraArgHack) + Args.push_back(Constant::getNullValue(Type::Int32Ty)); + + // Push any varargs arguments on the list + for (; AI != CS.arg_end(); ++AI) + Args.push_back(*AI); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = new InvokeInst(NF, II->getNormalDest(), II->getUnwindDest(), + &Args[0], Args.size(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + } else { + New = new CallInst(NF, &Args[0], Args.size(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + Args.clear(); + + // Update the alias analysis implementation to know that we are replacing + // the old call with a new one. + AA.replaceWithNewValue(Call, New); + + if (!Call->use_empty()) { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->getParent()->getInstList().erase(Call); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I) + if (!ArgsToPromote.count(I)) { + // If this is an unmodified argument, move the name and users over to the + // new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + AA.replaceWithNewValue(I, I2); + ++I2; + } else if (I->use_empty()) { + AA.deleteValue(I); + } else { + // Otherwise, if we promoted this argument, then all users are load + // instructions, and all loads should be using the new argument that we + // added. + ScalarizeTable &ArgIndices = ScalarizedElements[I]; + + while (!I->use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { + assert(ArgIndices.begin()->empty() && + "Load element should sort to front!"); + I2->setName(I->getName()+".val"); + LI->replaceAllUsesWith(I2); + AA.replaceWithNewValue(LI, I2); + LI->getParent()->getInstList().erase(LI); + DOUT << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"; + } else { + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); + std::vector<Value*> Operands(GEP->op_begin()+1, GEP->op_end()); + + Function::arg_iterator TheArg = I2; + for (ScalarizeTable::iterator It = ArgIndices.begin(); + *It != Operands; ++It, ++TheArg) { + assert(It != ArgIndices.end() && "GEP not handled??"); + } + + std::string NewName = I->getName(); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (ConstantInt *CI = dyn_cast<ConstantInt>(Operands[i])) + NewName += "." + CI->getValue().toString(10); + else + NewName += ".x"; + TheArg->setName(NewName+".val"); + + DOUT << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << F->getName() << "'\n"; + + // All of the uses must be load instructions. Replace them all with + // the argument specified by ArgNo. + while (!GEP->use_empty()) { + LoadInst *L = cast<LoadInst>(GEP->use_back()); + L->replaceAllUsesWith(TheArg); + AA.replaceWithNewValue(L, TheArg); + L->getParent()->getInstList().erase(L); + } + AA.deleteValue(GEP); + GEP->getParent()->getInstList().erase(GEP); + } + } + + // Increment I2 past all of the arguments added for this promoted pointer. + for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) + ++I2; + } + + // Notify the alias analysis implementation that we inserted a new argument. + if (ExtraArgHack) + AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin()); + + + // Tell the alias analysis that the old function is about to disappear. + AA.replaceWithNewValue(F, NF); + + // Now that the old function is dead, delete it. + F->getParent()->getFunctionList().erase(F); + return NF; +} diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp new file mode 100644 index 0000000..0c7ee59 --- /dev/null +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -0,0 +1,116 @@ +//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to a pass that merges duplicate global +// constants together into a single constant that is shared. This is useful +// because some passes (ie TraceValues) insert a lot of string constants into +// the program, regardless of whether or not an existing string is available. +// +// Algorithm: ConstantMerge is designed to build up a map of available constants +// and eliminate duplicates when it is initialized. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "constmerge" +#include "llvm/Transforms/IPO.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumMerged, "Number of global constants merged"); + +namespace { + struct VISIBILITY_HIDDEN ConstantMerge : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ConstantMerge() : ModulePass((intptr_t)&ID) {} + + // run - For this pass, process all of the globals in the module, + // eliminating duplicate constants. + // + bool runOnModule(Module &M); + }; + + char ConstantMerge::ID = 0; + RegisterPass<ConstantMerge>X("constmerge","Merge Duplicate Global Constants"); +} + +ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + +bool ConstantMerge::runOnModule(Module &M) { + // Map unique constant/section pairs to globals. We don't want to merge + // globals in different sections. + std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap; + + // Replacements - This vector contains a list of replacements to perform. + std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements; + + bool MadeChange = false; + + // Iterate constant merging while we are still making progress. Merging two + // constants together may allow us to merge other constants together if the + // second level constants have initializers which point to the globals that + // were just merged. + while (1) { + // First pass: identify all globals that can be merged together, filling in + // the Replacements vector. We cannot do the replacement in this pass + // because doing so may cause initializers of other globals to be rewritten, + // invalidating the Constant* pointers in CMap. + // + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + + // If this GV is dead, remove it. + GV->removeDeadConstantUsers(); + if (GV->use_empty() && GV->hasInternalLinkage()) { + GV->eraseFromParent(); + continue; + } + + // Only process constants with initializers. + if (GV->isConstant() && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())]; + + if (Slot == 0) { // Nope, add it to the map. + Slot = GV; + } else if (GV->hasInternalLinkage()) { // Yup, this is a duplicate! + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); + } else if (GV->hasInternalLinkage()) { + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(Slot, GV)); + Slot = GV; + } + } + } + + if (Replacements.empty()) + return MadeChange; + CMap.clear(); + + // Now that we have figured out which replacements must be made, do them all + // now. This avoid invalidating the pointers in CMap, which are unneeded + // now. + for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { + // Eliminate any uses of the dead global... + Replacements[i].first->replaceAllUsesWith(Replacements[i].second); + + // Delete the global value from the module... + M.getGlobalList().erase(Replacements[i].first); + } + + NumMerged += Replacements.size(); + Replacements.clear(); + } +} diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp new file mode 100644 index 0000000..943ea30 --- /dev/null +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -0,0 +1,703 @@ +//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass deletes dead arguments from internal functions. Dead argument +// elimination removes arguments which are directly dead, as well as arguments +// only passed into function calls as dead arguments of other functions. This +// pass also deletes dead arguments in a similar way. +// +// This pass is often useful as a cleanup pass to run after aggressive +// interprocedural passes, which add possibly-dead arguments. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadargelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constant.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include <set> +using namespace llvm; + +STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); +STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); + +namespace { + /// DAE - The dead argument elimination pass. + /// + class VISIBILITY_HIDDEN DAE : public ModulePass { + /// Liveness enum - During our initial pass over the program, we determine + /// that things are either definately alive, definately dead, or in need of + /// interprocedural analysis (MaybeLive). + /// + enum Liveness { Live, MaybeLive, Dead }; + + /// LiveArguments, MaybeLiveArguments, DeadArguments - These sets contain + /// all of the arguments in the program. The Dead set contains arguments + /// which are completely dead (never used in the function). The MaybeLive + /// set contains arguments which are only passed into other function calls, + /// thus may be live and may be dead. The Live set contains arguments which + /// are known to be alive. + /// + std::set<Argument*> DeadArguments, MaybeLiveArguments, LiveArguments; + + /// DeadRetVal, MaybeLiveRetVal, LifeRetVal - These sets contain all of the + /// functions in the program. The Dead set contains functions whose return + /// value is known to be dead. The MaybeLive set contains functions whose + /// return values are only used by return instructions, and the Live set + /// contains functions whose return values are used, functions that are + /// external, and functions that already return void. + /// + std::set<Function*> DeadRetVal, MaybeLiveRetVal, LiveRetVal; + + /// InstructionsToInspect - As we mark arguments and return values + /// MaybeLive, we keep track of which instructions could make the values + /// live here. Once the entire program has had the return value and + /// arguments analyzed, this set is scanned to promote the MaybeLive objects + /// to be Live if they really are used. + std::vector<Instruction*> InstructionsToInspect; + + /// CallSites - Keep track of the call sites of functions that have + /// MaybeLive arguments or return values. + std::multimap<Function*, CallSite> CallSites; + + public: + static char ID; // Pass identification, replacement for typeid + DAE() : ModulePass((intptr_t)&ID) {} + bool runOnModule(Module &M); + + virtual bool ShouldHackArguments() const { return false; } + + private: + Liveness getArgumentLiveness(const Argument &A); + bool isMaybeLiveArgumentNowLive(Argument *Arg); + + bool DeleteDeadVarargs(Function &Fn); + void SurveyFunction(Function &Fn); + + void MarkArgumentLive(Argument *Arg); + void MarkRetValLive(Function *F); + void MarkReturnInstArgumentLive(ReturnInst *RI); + + void RemoveDeadArgumentsFromFunction(Function *F); + }; + char DAE::ID = 0; + RegisterPass<DAE> X("deadargelim", "Dead Argument Elimination"); + + /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but + /// deletes arguments to functions which are external. This is only for use + /// by bugpoint. + struct DAH : public DAE { + static char ID; + virtual bool ShouldHackArguments() const { return true; } + }; + char DAH::ID = 0; + RegisterPass<DAH> Y("deadarghaX0r", + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)"); +} + +/// createDeadArgEliminationPass - This pass removes arguments from functions +/// which are not used by the body of the function. +/// +ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } +ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } + +/// DeleteDeadVarargs - If this is an function that takes a ... list, and if +/// llvm.vastart is never called, the varargs list is dead for the function. +bool DAE::DeleteDeadVarargs(Function &Fn) { + assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (Fn.isDeclaration() || !Fn.hasInternalLinkage()) return false; + + // Ensure that the function is only directly called. + for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) { + // If this use is anything other than a call site, give up. + CallSite CS = CallSite::get(*I); + Instruction *TheCall = CS.getInstruction(); + if (!TheCall) return false; // Not a direct call site? + + // The addr of this function is passed to the call. + if (I.getOperandNo() != 0) return false; + } + + // Okay, we know we can transform this function if safe. Scan its body + // looking for calls to llvm.vastart. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + return false; + } + } + } + + // If we get here, there are no calls to llvm.vastart in the function body, + // remove the "..." and adjust all the calls. + + // Start by computing a new prototype for the function, which is the same as + // the old function, but has fewer arguments. + const FunctionType *FTy = Fn.getFunctionType(); + std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); + unsigned NumArgs = Params.size(); + + // Create the new function body and insert it into the module... + Function *NF = new Function(NFTy, Fn.getLinkage()); + NF->setCallingConv(Fn.getCallingConv()); + Fn.getParent()->getFunctionList().insert(&Fn, NF); + NF->takeName(&Fn); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + while (!Fn.use_empty()) { + CallSite CS = CallSite::get(Fn.use_back()); + Instruction *Call = CS.getInstruction(); + + // Loop over the operands, dropping extraneous ones at the end of the list. + Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = new InvokeInst(NF, II->getNormalDest(), II->getUnwindDest(), + &Args[0], Args.size(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + } else { + New = new CallInst(NF, &Args[0], Args.size(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + Args.clear(); + + if (!Call->use_empty()) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + + New->takeName(Call); + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->getParent()->getInstList().erase(Call); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. While we're at + // it, remove the dead arguments from the DeadArguments list. + // + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), + I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // Move the name and users over to the new version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + } + + // Finally, nuke the old function. + Fn.eraseFromParent(); + return true; +} + + +static inline bool CallPassesValueThoughVararg(Instruction *Call, + const Value *Arg) { + CallSite CS = CallSite::get(Call); + const Type *CalledValueTy = CS.getCalledValue()->getType(); + const Type *FTy = cast<PointerType>(CalledValueTy)->getElementType(); + unsigned NumFixedArgs = cast<FunctionType>(FTy)->getNumParams(); + for (CallSite::arg_iterator AI = CS.arg_begin()+NumFixedArgs; + AI != CS.arg_end(); ++AI) + if (AI->get() == Arg) + return true; + return false; +} + +// getArgumentLiveness - Inspect an argument, determining if is known Live +// (used in a computation), MaybeLive (only passed as an argument to a call), or +// Dead (not used). +DAE::Liveness DAE::getArgumentLiveness(const Argument &A) { + const FunctionType *FTy = A.getParent()->getFunctionType(); + + // If this is the return value of a struct function, it's not really dead. + if (FTy->isStructReturn() && &*A.getParent()->arg_begin() == &A) + return Live; + + if (A.use_empty()) // First check, directly dead? + return Dead; + + // Scan through all of the uses, looking for non-argument passing uses. + for (Value::use_const_iterator I = A.use_begin(), E = A.use_end(); I!=E;++I) { + // Return instructions do not immediately effect liveness. + if (isa<ReturnInst>(*I)) + continue; + + CallSite CS = CallSite::get(const_cast<User*>(*I)); + if (!CS.getInstruction()) { + // If its used by something that is not a call or invoke, it's alive! + return Live; + } + // If it's an indirect call, mark it alive... + Function *Callee = CS.getCalledFunction(); + if (!Callee) return Live; + + // Check to see if it's passed through a va_arg area: if so, we cannot + // remove it. + if (CallPassesValueThoughVararg(CS.getInstruction(), &A)) + return Live; // If passed through va_arg area, we cannot remove it + } + + return MaybeLive; // It must be used, but only as argument to a function +} + + +// SurveyFunction - This performs the initial survey of the specified function, +// checking out whether or not it uses any of its incoming arguments or whether +// any callers use the return value. This fills in the +// (Dead|MaybeLive|Live)(Arguments|RetVal) sets. +// +// We consider arguments of non-internal functions to be intrinsically alive as +// well as arguments to functions which have their "address taken". +// +void DAE::SurveyFunction(Function &F) { + bool FunctionIntrinsicallyLive = false; + Liveness RetValLiveness = F.getReturnType() == Type::VoidTy ? Live : Dead; + + if (!F.hasInternalLinkage() && + (!ShouldHackArguments() || F.getIntrinsicID())) + FunctionIntrinsicallyLive = true; + else + for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { + // If this use is anything other than a call site, the function is alive. + CallSite CS = CallSite::get(*I); + Instruction *TheCall = CS.getInstruction(); + if (!TheCall) { // Not a direct call site? + FunctionIntrinsicallyLive = true; + break; + } + + // Check to see if the return value is used... + if (RetValLiveness != Live) + for (Value::use_iterator I = TheCall->use_begin(), + E = TheCall->use_end(); I != E; ++I) + if (isa<ReturnInst>(cast<Instruction>(*I))) { + RetValLiveness = MaybeLive; + } else if (isa<CallInst>(cast<Instruction>(*I)) || + isa<InvokeInst>(cast<Instruction>(*I))) { + if (CallPassesValueThoughVararg(cast<Instruction>(*I), TheCall) || + !CallSite::get(cast<Instruction>(*I)).getCalledFunction()) { + RetValLiveness = Live; + break; + } else { + RetValLiveness = MaybeLive; + } + } else { + RetValLiveness = Live; + break; + } + + // If the function is PASSED IN as an argument, its address has been taken + for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); + AI != E; ++AI) + if (AI->get() == &F) { + FunctionIntrinsicallyLive = true; + break; + } + if (FunctionIntrinsicallyLive) break; + } + + if (FunctionIntrinsicallyLive) { + DOUT << " Intrinsically live fn: " << F.getName() << "\n"; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) + LiveArguments.insert(AI); + LiveRetVal.insert(&F); + return; + } + + switch (RetValLiveness) { + case Live: LiveRetVal.insert(&F); break; + case MaybeLive: MaybeLiveRetVal.insert(&F); break; + case Dead: DeadRetVal.insert(&F); break; + } + + DOUT << " Inspecting args for fn: " << F.getName() << "\n"; + + // If it is not intrinsically alive, we know that all users of the + // function are call sites. Mark all of the arguments live which are + // directly used, and keep track of all of the call sites of this function + // if there are any arguments we assume that are dead. + // + bool AnyMaybeLiveArgs = false; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) + switch (getArgumentLiveness(*AI)) { + case Live: + DOUT << " Arg live by use: " << AI->getName() << "\n"; + LiveArguments.insert(AI); + break; + case Dead: + DOUT << " Arg definitely dead: " << AI->getName() <<"\n"; + DeadArguments.insert(AI); + break; + case MaybeLive: + DOUT << " Arg only passed to calls: " << AI->getName() << "\n"; + AnyMaybeLiveArgs = true; + MaybeLiveArguments.insert(AI); + break; + } + + // If there are any "MaybeLive" arguments, we need to check callees of + // this function when/if they become alive. Record which functions are + // callees... + if (AnyMaybeLiveArgs || RetValLiveness == MaybeLive) + for (Value::use_iterator I = F.use_begin(), E = F.use_end(); + I != E; ++I) { + if (AnyMaybeLiveArgs) + CallSites.insert(std::make_pair(&F, CallSite::get(*I))); + + if (RetValLiveness == MaybeLive) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + InstructionsToInspect.push_back(cast<Instruction>(*UI)); + } +} + +// isMaybeLiveArgumentNowLive - Check to see if Arg is alive. At this point, we +// know that the only uses of Arg are to be passed in as an argument to a +// function call or return. Check to see if the formal argument passed in is in +// the LiveArguments set. If so, return true. +// +bool DAE::isMaybeLiveArgumentNowLive(Argument *Arg) { + for (Value::use_iterator I = Arg->use_begin(), E = Arg->use_end(); I!=E; ++I){ + if (isa<ReturnInst>(*I)) { + if (LiveRetVal.count(Arg->getParent())) return true; + continue; + } + + CallSite CS = CallSite::get(*I); + + // We know that this can only be used for direct calls... + Function *Callee = CS.getCalledFunction(); + + // Loop over all of the arguments (because Arg may be passed into the call + // multiple times) and check to see if any are now alive... + CallSite::arg_iterator CSAI = CS.arg_begin(); + for (Function::arg_iterator AI = Callee->arg_begin(), E = Callee->arg_end(); + AI != E; ++AI, ++CSAI) + // If this is the argument we are looking for, check to see if it's alive + if (*CSAI == Arg && LiveArguments.count(AI)) + return true; + } + return false; +} + +/// MarkArgumentLive - The MaybeLive argument 'Arg' is now known to be alive. +/// Mark it live in the specified sets and recursively mark arguments in callers +/// live that are needed to pass in a value. +/// +void DAE::MarkArgumentLive(Argument *Arg) { + std::set<Argument*>::iterator It = MaybeLiveArguments.lower_bound(Arg); + if (It == MaybeLiveArguments.end() || *It != Arg) return; + + DOUT << " MaybeLive argument now live: " << Arg->getName() <<"\n"; + MaybeLiveArguments.erase(It); + LiveArguments.insert(Arg); + + // Loop over all of the call sites of the function, making any arguments + // passed in to provide a value for this argument live as necessary. + // + Function *Fn = Arg->getParent(); + unsigned ArgNo = std::distance(Fn->arg_begin(), Function::arg_iterator(Arg)); + + std::multimap<Function*, CallSite>::iterator I = CallSites.lower_bound(Fn); + for (; I != CallSites.end() && I->first == Fn; ++I) { + CallSite CS = I->second; + Value *ArgVal = *(CS.arg_begin()+ArgNo); + if (Argument *ActualArg = dyn_cast<Argument>(ArgVal)) { + MarkArgumentLive(ActualArg); + } else { + // If the value passed in at this call site is a return value computed by + // some other call site, make sure to mark the return value at the other + // call site as being needed. + CallSite ArgCS = CallSite::get(ArgVal); + if (ArgCS.getInstruction()) + if (Function *Fn = ArgCS.getCalledFunction()) + MarkRetValLive(Fn); + } + } +} + +/// MarkArgumentLive - The MaybeLive return value for the specified function is +/// now known to be alive. Propagate this fact to the return instructions which +/// produce it. +void DAE::MarkRetValLive(Function *F) { + assert(F && "Shame shame, we can't have null pointers here!"); + + // Check to see if we already knew it was live + std::set<Function*>::iterator I = MaybeLiveRetVal.lower_bound(F); + if (I == MaybeLiveRetVal.end() || *I != F) return; // It's already alive! + + DOUT << " MaybeLive retval now live: " << F->getName() << "\n"; + + MaybeLiveRetVal.erase(I); + LiveRetVal.insert(F); // It is now known to be live! + + // Loop over all of the functions, noticing that the return value is now live. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + MarkReturnInstArgumentLive(RI); +} + +void DAE::MarkReturnInstArgumentLive(ReturnInst *RI) { + Value *Op = RI->getOperand(0); + if (Argument *A = dyn_cast<Argument>(Op)) { + MarkArgumentLive(A); + } else if (CallInst *CI = dyn_cast<CallInst>(Op)) { + if (Function *F = CI->getCalledFunction()) + MarkRetValLive(F); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) { + if (Function *F = II->getCalledFunction()) + MarkRetValLive(F); + } +} + +// RemoveDeadArgumentsFromFunction - We know that F has dead arguments, as +// specified by the DeadArguments list. Transform the function and all of the +// callees of the function to not have these arguments. +// +void DAE::RemoveDeadArgumentsFromFunction(Function *F) { + // Start by computing a new prototype for the function, which is the same as + // the old function, but has fewer arguments. + const FunctionType *FTy = F->getFunctionType(); + std::vector<const Type*> Params; + + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + if (!DeadArguments.count(I)) + Params.push_back(I->getType()); + + const Type *RetTy = FTy->getReturnType(); + if (DeadRetVal.count(F)) { + RetTy = Type::VoidTy; + DeadRetVal.erase(F); + } + + // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which + // have zero fixed arguments. + // + bool ExtraArgHack = false; + if (Params.empty() && FTy->isVarArg()) { + ExtraArgHack = true; + Params.push_back(Type::Int32Ty); + } + + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + + // Create the new function body and insert it into the module... + Function *NF = new Function(NFTy, F->getLinkage()); + NF->setCallingConv(F->getCallingConv()); + F->getParent()->getFunctionList().insert(F, NF); + NF->takeName(F); + + // Loop over all of the callers of the function, transforming the call sites + // to pass in a smaller number of arguments into the new function. + // + std::vector<Value*> Args; + while (!F->use_empty()) { + CallSite CS = CallSite::get(F->use_back()); + Instruction *Call = CS.getInstruction(); + + // Loop over the operands, deleting dead ones... + CallSite::arg_iterator AI = CS.arg_begin(); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI) + if (!DeadArguments.count(I)) // Remove operands for dead arguments + Args.push_back(*AI); + + if (ExtraArgHack) + Args.push_back(UndefValue::get(Type::Int32Ty)); + + // Push any varargs arguments on the list + for (; AI != CS.arg_end(); ++AI) + Args.push_back(*AI); + + Instruction *New; + if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + New = new InvokeInst(NF, II->getNormalDest(), II->getUnwindDest(), + &Args[0], Args.size(), "", Call); + cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); + } else { + New = new CallInst(NF, &Args[0], Args.size(), "", Call); + cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); + if (cast<CallInst>(Call)->isTailCall()) + cast<CallInst>(New)->setTailCall(); + } + Args.clear(); + + if (!Call->use_empty()) { + if (New->getType() == Type::VoidTy) + Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); + else { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } + } + + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->getParent()->getInstList().erase(Call); + } + + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transfering uses of the old arguments over to + // the new arguments, also transfering over the names as well. While we're at + // it, remove the dead arguments from the DeadArguments list. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); + I != E; ++I) + if (!DeadArguments.count(I)) { + // If this is a live argument, move the name and users over to the new + // version. + I->replaceAllUsesWith(I2); + I2->takeName(I); + ++I2; + } else { + // If this argument is dead, replace any uses of it with null constants + // (these are guaranteed to only be operands to call instructions which + // will later be simplified). + I->replaceAllUsesWith(Constant::getNullValue(I->getType())); + DeadArguments.erase(I); + } + + // If we change the return value of the function we must rewrite any return + // instructions. Check this now. + if (F->getReturnType() != NF->getReturnType()) + for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + new ReturnInst(0, RI); + BB->getInstList().erase(RI); + } + + // Now that the old function is dead, delete it. + F->getParent()->getFunctionList().erase(F); +} + +bool DAE::runOnModule(Module &M) { + // First phase: loop through the module, determining which arguments are live. + // We assume all arguments are dead unless proven otherwise (allowing us to + // determine that dead arguments passed into recursive functions are dead). + // + DOUT << "DAE - Determining liveness\n"; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function &F = *I++; + if (F.getFunctionType()->isVarArg()) + if (DeleteDeadVarargs(F)) + continue; + + SurveyFunction(F); + } + + // Loop over the instructions to inspect, propagating liveness among arguments + // and return values which are MaybeLive. + + while (!InstructionsToInspect.empty()) { + Instruction *I = InstructionsToInspect.back(); + InstructionsToInspect.pop_back(); + + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) { + // For return instructions, we just have to check to see if the return + // value for the current function is known now to be alive. If so, any + // arguments used by it are now alive, and any call instruction return + // value is alive as well. + if (LiveRetVal.count(RI->getParent()->getParent())) + MarkReturnInstArgumentLive(RI); + + } else { + CallSite CS = CallSite::get(I); + assert(CS.getInstruction() && "Unknown instruction for the I2I list!"); + + Function *Callee = CS.getCalledFunction(); + + // If we found a call or invoke instruction on this list, that means that + // an argument of the function is a call instruction. If the argument is + // live, then the return value of the called instruction is now live. + // + CallSite::arg_iterator AI = CS.arg_begin(); // ActualIterator + for (Function::arg_iterator FI = Callee->arg_begin(), + E = Callee->arg_end(); FI != E; ++AI, ++FI) { + // If this argument is another call... + CallSite ArgCS = CallSite::get(*AI); + if (ArgCS.getInstruction() && LiveArguments.count(FI)) + if (Function *Callee = ArgCS.getCalledFunction()) + MarkRetValLive(Callee); + } + } + } + + // Now we loop over all of the MaybeLive arguments, promoting them to be live + // arguments if one of the calls that uses the arguments to the calls they are + // passed into requires them to be live. Of course this could make other + // arguments live, so process callers recursively. + // + // Because elements can be removed from the MaybeLiveArguments set, copy it to + // a temporary vector. + // + std::vector<Argument*> TmpArgList(MaybeLiveArguments.begin(), + MaybeLiveArguments.end()); + for (unsigned i = 0, e = TmpArgList.size(); i != e; ++i) { + Argument *MLA = TmpArgList[i]; + if (MaybeLiveArguments.count(MLA) && + isMaybeLiveArgumentNowLive(MLA)) + MarkArgumentLive(MLA); + } + + // Recover memory early... + CallSites.clear(); + + // At this point, we know that all arguments in DeadArguments and + // MaybeLiveArguments are dead. If the two sets are empty, there is nothing + // to do. + if (MaybeLiveArguments.empty() && DeadArguments.empty() && + MaybeLiveRetVal.empty() && DeadRetVal.empty()) + return false; + + // Otherwise, compact into one set, and start eliminating the arguments from + // the functions. + DeadArguments.insert(MaybeLiveArguments.begin(), MaybeLiveArguments.end()); + MaybeLiveArguments.clear(); + DeadRetVal.insert(MaybeLiveRetVal.begin(), MaybeLiveRetVal.end()); + MaybeLiveRetVal.clear(); + + LiveArguments.clear(); + LiveRetVal.clear(); + + NumArgumentsEliminated += DeadArguments.size(); + NumRetValsEliminated += DeadRetVal.size(); + while (!DeadArguments.empty()) + RemoveDeadArgumentsFromFunction((*DeadArguments.begin())->getParent()); + + while (!DeadRetVal.empty()) + RemoveDeadArgumentsFromFunction(*DeadRetVal.begin()); + return true; +} diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp new file mode 100644 index 0000000..87b725a --- /dev/null +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -0,0 +1,106 @@ +//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to cleanup the output of GCC. It eliminate names for types +// that are unused in the entire translation unit, using the FindUsedTypes pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "deadtypeelim" +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Module.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); + +namespace { + struct VISIBILITY_HIDDEN DTE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + DTE() : ModulePass((intptr_t)&ID) {} + + // doPassInitialization - For this pass, it removes global symbol table + // entries for primitive types. These are never used for linking in GCC and + // they make the output uglier to look at, so we nuke them. + // + // Also, initialize instance variables. + // + bool runOnModule(Module &M); + + // getAnalysisUsage - This function needs FindUsedTypes to do its job... + // + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<FindUsedTypes>(); + } + }; + char DTE::ID = 0; + RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination"); +} + +ModulePass *llvm::createDeadTypeEliminationPass() { + return new DTE(); +} + + +// ShouldNukeSymtabEntry - Return true if this module level symbol table entry +// should be eliminated. +// +static inline bool ShouldNukeSymtabEntry(const Type *Ty){ + // Nuke all names for primitive types! + if (Ty->isPrimitiveType() || Ty->isInteger()) + return true; + + // Nuke all pointers to primitive types as well... + if (const PointerType *PT = dyn_cast<PointerType>(Ty)) + if (PT->getElementType()->isPrimitiveType() || + PT->getElementType()->isInteger()) + return true; + + return false; +} + +// run - For this pass, it removes global symbol table entries for primitive +// types. These are never used for linking in GCC and they make the output +// uglier to look at, so we nuke them. Also eliminate types that are never used +// in the entire program as indicated by FindUsedTypes. +// +bool DTE::runOnModule(Module &M) { + bool Changed = false; + + TypeSymbolTable &ST = M.getTypeSymbolTable(); + std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes(); + + // Check the symbol table for superfluous type entries... + // + // Grab the 'type' plane of the module symbol... + TypeSymbolTable::iterator TI = ST.begin(); + TypeSymbolTable::iterator TE = ST.end(); + while ( TI != TE ) { + // If this entry should be unconditionally removed, or if we detect that + // the type is not used, remove it. + const Type *RHS = TI->second; + if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { + ST.remove(TI++); + ++NumKilled; + Changed = true; + } else { + ++TI; + // We only need to leave one name for each type. + UsedTypes.erase(RHS); + } + } + + return Changed; +} + +// vim: sw=2 diff --git a/lib/Transforms/IPO/ExtractFunction.cpp b/lib/Transforms/IPO/ExtractFunction.cpp new file mode 100644 index 0000000..8d6af41 --- /dev/null +++ b/lib/Transforms/IPO/ExtractFunction.cpp @@ -0,0 +1,144 @@ +//===-- ExtractFunction.cpp - Function extraction pass --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass extracts +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + /// @brief A pass to extract specific functions and their dependencies. + class VISIBILITY_HIDDEN FunctionExtractorPass : public ModulePass { + Function *Named; + bool deleteFunc; + bool reLink; + public: + static char ID; // Pass identification, replacement for typeid + + /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the + /// specified function. Otherwise, it deletes as much of the module as + /// possible, except for the function specified. + /// + FunctionExtractorPass(Function *F = 0, bool deleteFn = true, + bool relinkCallees = false) + : ModulePass((intptr_t)&ID), Named(F), deleteFunc(deleteFn), + reLink(relinkCallees) {} + + bool runOnModule(Module &M) { + if (Named == 0) { + Named = M.getFunction("main"); + if (Named == 0) return false; // No function to extract + } + + if (deleteFunc) + return deleteFunction(); + M.setModuleInlineAsm(""); + return isolateFunction(M); + } + + bool deleteFunction() { + // If we're in relinking mode, set linkage of all internal callees to + // external. This will allow us extract function, and then - link + // everything together + if (reLink) { + for (Function::iterator B = Named->begin(), BE = Named->end(); + B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), E = B->end(); + I != E; ++I) { + if (CallInst* callInst = dyn_cast<CallInst>(&*I)) { + Function* Callee = callInst->getCalledFunction(); + if (Callee && Callee->hasInternalLinkage()) + Callee->setLinkage(GlobalValue::ExternalLinkage); + } + } + } + } + + Named->setLinkage(GlobalValue::ExternalLinkage); + Named->deleteBody(); + assert(Named->isDeclaration() && "This didn't make the function external!"); + return true; + } + + bool isolateFunction(Module &M) { + // Make sure our result is globally accessible... + Named->setLinkage(GlobalValue::ExternalLinkage); + + // Mark all global variables internal + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + if (!I->isDeclaration()) { + I->setInitializer(0); // Make all variables external + I->setLinkage(GlobalValue::ExternalLinkage); + } + + // All of the functions may be used by global variables or the named + // function. Loop through them and create a new, external functions that + // can be "used", instead of ones with bodies. + std::vector<Function*> NewFunctions; + + Function *Last = --M.end(); // Figure out where the last real fn is. + + for (Module::iterator I = M.begin(); ; ++I) { + if (&*I != Named) { + Function *New = new Function(I->getFunctionType(), + GlobalValue::ExternalLinkage); + New->setCallingConv(I->getCallingConv()); + + // If it's not the named function, delete the body of the function + I->dropAllReferences(); + + M.getFunctionList().push_back(New); + NewFunctions.push_back(New); + New->takeName(I); + } + + if (&*I == Last) break; // Stop after processing the last function + } + + // Now that we have replacements all set up, loop through the module, + // deleting the old functions, replacing them with the newly created + // functions. + if (!NewFunctions.empty()) { + unsigned FuncNum = 0; + Module::iterator I = M.begin(); + do { + if (&*I != Named) { + // Make everything that uses the old function use the new dummy fn + I->replaceAllUsesWith(NewFunctions[FuncNum++]); + + Function *Old = I; + ++I; // Move the iterator to the new function + + // Delete the old function! + M.getFunctionList().erase(Old); + + } else { + ++I; // Skip the function we are extracting + } + } while (&*I != NewFunctions[0]); + } + + return true; + } + }; + + char FunctionExtractorPass::ID = 0; + RegisterPass<FunctionExtractorPass> X("extract", "Function Extractor"); +} + +ModulePass *llvm::createFunctionExtractionPass(Function *F, bool deleteFn, + bool relinkCallees) { + return new FunctionExtractorPass(F, deleteFn, relinkCallees); +} diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp new file mode 100644 index 0000000..09cfa21 --- /dev/null +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -0,0 +1,203 @@ +//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate unreachable internal globals from the +// program. It uses an aggressive algorithm, searching out globals that are +// known to be alive. After it finds all of the globals which are needed, it +// deletes whatever is left over. This allows it to delete recursive chunks of +// the program which are unreachable. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globaldce" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include <set> +using namespace llvm; + +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { + struct VISIBILITY_HIDDEN GlobalDCE : public ModulePass { + static char ID; // Pass identification, replacement for typeid + GlobalDCE() : ModulePass((intptr_t)&ID) {} + + // run - Do the GlobalDCE pass on the specified module, optionally updating + // the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M); + + private: + std::set<GlobalValue*> AliveGlobals; + + /// MarkGlobalIsNeeded - the specific global value as needed, and + /// recursively mark anything that it uses as also needed. + void GlobalIsNeeded(GlobalValue *GV); + void MarkUsedGlobalsAsNeeded(Constant *C); + + bool SafeToDestroyConstant(Constant* C); + bool RemoveUnusedGlobalValue(GlobalValue &GV); + }; + char GlobalDCE::ID = 0; + RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination"); +} + +ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } + +bool GlobalDCE::runOnModule(Module &M) { + bool Changed = false; + // Loop over the module, adding globals which are obviously necessary. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Functions with external linkage are needed if they have a body + if ((!I->hasInternalLinkage() && !I->hasLinkOnceLinkage()) && + !I->isDeclaration()) + GlobalIsNeeded(I); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + Changed |= RemoveUnusedGlobalValue(*I); + // Externally visible & appending globals are needed, if they have an + // initializer. + if ((!I->hasInternalLinkage() && !I->hasLinkOnceLinkage()) && + !I->isDeclaration()) + GlobalIsNeeded(I); + } + + + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + // Aliases are always needed even if they are not used. + MarkUsedGlobalsAsNeeded(I->getAliasee()); + } + + // Now that all globals which are needed are in the AliveGlobals set, we loop + // through the program, deleting those which are not alive. + // + + // The first pass is to drop initializers of global variables which are dead. + std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadGlobalVars.push_back(I); // Keep track of dead globals + I->setInitializer(0); + } + + + // The second pass drops the bodies of functions which are dead... + std::vector<Function*> DeadFunctions; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!AliveGlobals.count(I)) { + DeadFunctions.push_back(I); // Keep track of dead globals + if (!I->isDeclaration()) + I->deleteBody(); + } + + if (!DeadFunctions.empty()) { + // Now that all interreferences have been dropped, delete the actual objects + // themselves. + for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadFunctions[i]); + M.getFunctionList().erase(DeadFunctions[i]); + } + NumFunctions += DeadFunctions.size(); + Changed = true; + } + + if (!DeadGlobalVars.empty()) { + for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) { + RemoveUnusedGlobalValue(*DeadGlobalVars[i]); + M.getGlobalList().erase(DeadGlobalVars[i]); + } + NumVariables += DeadGlobalVars.size(); + Changed = true; + } + + // Make sure that all memory is released + AliveGlobals.clear(); + return Changed; +} + +/// MarkGlobalIsNeeded - the specific global value as needed, and +/// recursively mark anything that it uses as also needed. +void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { + std::set<GlobalValue*>::iterator I = AliveGlobals.lower_bound(G); + + // If the global is already in the set, no need to reprocess it. + if (I != AliveGlobals.end() && *I == G) return; + + // Otherwise insert it now, so we do not infinitely recurse + AliveGlobals.insert(I, G); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { + // If this is a global variable, we must make sure to add any global values + // referenced by the initializer to the alive set. + if (GV->hasInitializer()) + MarkUsedGlobalsAsNeeded(GV->getInitializer()); + } else if (!isa<GlobalAlias>(G)) { + // Otherwise this must be a function object. We have to scan the body of + // the function looking for constants and global values which are used as + // operands. Any operands of these types must be processed to ensure that + // any globals used will be marked as needed. + Function *F = cast<Function>(G); + // For all basic blocks... + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + // For all instructions... + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + // For all operands... + for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) + if (GlobalValue *GV = dyn_cast<GlobalValue>(*U)) + GlobalIsNeeded(GV); + else if (Constant *C = dyn_cast<Constant>(*U)) + MarkUsedGlobalsAsNeeded(C); + } +} + +void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + GlobalIsNeeded(GV); + else { + // Loop over all of the operands of the constant, adding any globals they + // use to the list of needed globals. + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) + MarkUsedGlobalsAsNeeded(cast<Constant>(*I)); + } +} + +// RemoveUnusedGlobalValue - Loop over all of the uses of the specified +// GlobalValue, looking for the constant pointer ref that may be pointing to it. +// If found, check to see if the constant pointer ref is safe to destroy, and if +// so, nuke it. This will reduce the reference count on the global value, which +// might make it deader. +// +bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { + if (GV.use_empty()) return false; + GV.removeDeadConstantUsers(); + return GV.use_empty(); +} + +// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +// by constants itself. Note that constants cannot be cyclic, so this test is +// pretty easy to implement recursively. +// +bool GlobalDCE::SafeToDestroyConstant(Constant *C) { + for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I) + if (Constant *User = dyn_cast<Constant>(*I)) { + if (!SafeToDestroyConstant(User)) return false; + } else { + return false; + } + return true; +} diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp new file mode 100644 index 0000000..520af87 --- /dev/null +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -0,0 +1,1988 @@ +//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms simple global variables that never have their address +// taken. If obviously true, it marks read/write globals as constant, deletes +// variables only stored to, etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalopt" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <set> +using namespace llvm; + +STATISTIC(NumMarked , "Number of globals marked constant"); +STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); +STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); +STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); +STATISTIC(NumDeleted , "Number of globals deleted"); +STATISTIC(NumFnDeleted , "Number of functions deleted"); +STATISTIC(NumGlobUses , "Number of global uses devirtualized"); +STATISTIC(NumLocalized , "Number of globals localized"); +STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); +STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); +STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); + +namespace { + struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetData>(); + } + static char ID; // Pass identification, replacement for typeid + GlobalOpt() : ModulePass((intptr_t)&ID) {} + + bool runOnModule(Module &M); + + private: + GlobalVariable *FindGlobalCtors(Module &M); + bool OptimizeFunctions(Module &M); + bool OptimizeGlobalVars(Module &M); + bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); + bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); + }; + + char GlobalOpt::ID = 0; + RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); +} + +ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } + +/// GlobalStatus - As we analyze each global, keep track of some information +/// about it. If we find out that the address of the global is taken, none of +/// this info will be accurate. +struct VISIBILITY_HIDDEN GlobalStatus { + /// isLoaded - True if the global is ever loaded. If the global isn't ever + /// loaded it can be deleted. + bool isLoaded; + + /// StoredType - Keep track of what stores to the global look like. + /// + enum StoredType { + /// NotStored - There is no store to this global. It can thus be marked + /// constant. + NotStored, + + /// isInitializerStored - This global is stored to, but the only thing + /// stored is the constant it was initialized with. This is only tracked + /// for scalar globals. + isInitializerStored, + + /// isStoredOnce - This global is stored to, but only its initializer and + /// one other value is ever stored to it. If this global isStoredOnce, we + /// track the value stored to it in StoredOnceValue below. This is only + /// tracked for scalar globals. + isStoredOnce, + + /// isStored - This global is stored to by multiple values or something else + /// that we cannot track. + isStored + } StoredType; + + /// StoredOnceValue - If only one value (besides the initializer constant) is + /// ever stored to this global, keep track of what value it is. + Value *StoredOnceValue; + + /// AccessingFunction/HasMultipleAccessingFunctions - These start out + /// null/false. When the first accessing function is noticed, it is recorded. + /// When a second different accessing function is noticed, + /// HasMultipleAccessingFunctions is set to true. + Function *AccessingFunction; + bool HasMultipleAccessingFunctions; + + /// HasNonInstructionUser - Set to true if this global has a user that is not + /// an instruction (e.g. a constant expr or GV initializer). + bool HasNonInstructionUser; + + /// HasPHIUser - Set to true if this global has a user that is a PHI node. + bool HasPHIUser; + + /// isNotSuitableForSRA - Keep track of whether any SRA preventing users of + /// the global exist. Such users include GEP instruction with variable + /// indexes, and non-gep/load/store users like constant expr casts. + bool isNotSuitableForSRA; + + GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), + AccessingFunction(0), HasMultipleAccessingFunctions(false), + HasNonInstructionUser(false), HasPHIUser(false), + isNotSuitableForSRA(false) {} +}; + + + +/// ConstantIsDead - Return true if the specified constant is (transitively) +/// dead. The constant may be used by other constants (e.g. constant arrays and +/// constant exprs) as long as they are dead, but it cannot be used by anything +/// else. +static bool ConstantIsDead(Constant *C) { + if (isa<GlobalValue>(C)) return false; + + for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + if (Constant *CU = dyn_cast<Constant>(*UI)) { + if (!ConstantIsDead(CU)) return false; + } else + return false; + return true; +} + + +/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus +/// structure. If the global has its address taken, return true to indicate we +/// can't do anything with it. +/// +static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, + std::set<PHINode*> &PHIUsers) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + GS.HasNonInstructionUser = true; + + if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; + if (CE->getOpcode() != Instruction::GetElementPtr) + GS.isNotSuitableForSRA = true; + else if (!GS.isNotSuitableForSRA) { + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. + if (CE->getNumOperands() < 3 || !CE->getOperand(1)->isNullValue()) + GS.isNotSuitableForSRA = true; + else { + for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(CE->getOperand(i))) { + GS.isNotSuitableForSRA = true; + break; + } + } + } + + } else if (Instruction *I = dyn_cast<Instruction>(*UI)) { + if (!GS.HasMultipleAccessingFunctions) { + Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (isa<LoadInst>(I)) { + GS.isLoaded = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) return true; + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::isStored) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ + Value *StoredVal = SI->getOperand(0); + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + // G = G + if (GS.StoredType < GlobalStatus::isInitializerStored) + GS.StoredType = GlobalStatus::isInitializerStored; + } else if (GS.StoredType < GlobalStatus::isStoredOnce) { + GS.StoredType = GlobalStatus::isStoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::isStoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::isStored; + } + } else { + GS.StoredType = GlobalStatus::isStored; + } + } else if (isa<GetElementPtrInst>(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + + // If the first two indices are constants, this can be SRA'd. + if (isa<GlobalVariable>(I->getOperand(0))) { + if (I->getNumOperands() < 3 || !isa<Constant>(I->getOperand(1)) || + !cast<Constant>(I->getOperand(1))->isNullValue() || + !isa<ConstantInt>(I->getOperand(2))) + GS.isNotSuitableForSRA = true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I->getOperand(0))){ + if (CE->getOpcode() != Instruction::GetElementPtr || + CE->getNumOperands() < 3 || I->getNumOperands() < 2 || + !isa<Constant>(I->getOperand(0)) || + !cast<Constant>(I->getOperand(0))->isNullValue()) + GS.isNotSuitableForSRA = true; + } else { + GS.isNotSuitableForSRA = true; + } + } else if (isa<SelectInst>(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + GS.isNotSuitableForSRA = true; + } else if (PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PHIUsers.insert(PN).second) // Not already visited. + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; + GS.isNotSuitableForSRA = true; + GS.HasPHIUser = true; + } else if (isa<CmpInst>(I)) { + GS.isNotSuitableForSRA = true; + } else if (isa<MemCpyInst>(I) || isa<MemMoveInst>(I)) { + if (I->getOperand(1) == V) + GS.StoredType = GlobalStatus::isStored; + if (I->getOperand(2) == V) + GS.isLoaded = true; + GS.isNotSuitableForSRA = true; + } else if (isa<MemSetInst>(I)) { + assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + GS.StoredType = GlobalStatus::isStored; + GS.isNotSuitableForSRA = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (Constant *C = dyn_cast<Constant>(*UI)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!ConstantIsDead(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + + return false; +} + +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { + ConstantInt *CI = dyn_cast<ConstantInt>(Idx); + if (!CI) return 0; + unsigned IdxV = CI->getZExtValue(); + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { + if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); + } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { + if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); + } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { + if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); + } else if (isa<ConstantAggregateZero>(Agg)) { + if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return Constant::getNullValue(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast<SequentialType>(Agg->getType())) { + return Constant::getNullValue(STy->getElementType()); + } + } else if (isa<UndefValue>(Agg)) { + if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { + if (IdxV < STy->getNumElements()) + return UndefValue::get(STy->getElementType(IdxV)); + } else if (const SequentialType *STy = + dyn_cast<SequentialType>(Agg->getType())) { + return UndefValue::get(STy->getElementType()); + } + } + return 0; +} + + +/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all +/// users of the global, cleaning up the obvious ones. This is largely just a +/// quick scan over the use list to clean up the easy and obvious cruft. This +/// returns true if it made a change. +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { + User *U = *UI++; + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (Init) { + // Replace the load with the initializer. + LI->replaceAllUsesWith(Init); + LI->eraseFromParent(); + Changed = true; + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Store must be unreachable or storing Init into the global. + SI->eraseFromParent(); + Changed = true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Constant *SubInit = 0; + if (Init) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + Changed |= CleanupConstantGlobalUsers(CE, SubInit); + } else if (CE->getOpcode() == Instruction::BitCast && + isa<PointerType>(CE->getType())) { + // Pointer cast, delete any stores and memsets to the global. + Changed |= CleanupConstantGlobalUsers(CE, 0); + } + + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + Constant *SubInit = 0; + ConstantExpr *CE = + dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); + if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) + SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + + if (GEP->use_empty()) { + GEP->eraseFromParent(); + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv + if (MI->getRawDest() == V) { + MI->eraseFromParent(); + Changed = true; + } + + } else if (Constant *C = dyn_cast<Constant>(U)) { + // If we have a chain of dead constantexprs or other things dangling from + // us, and if they are all dead, nuke them without remorse. + if (ConstantIsDead(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + CleanupConstantGlobalUsers(V, Init); + return true; + } + } + } + return Changed; +} + +/// SRAGlobal - Perform scalar replacement of aggregates on the specified global +/// variable. This opens the door for other optimizations by exposing the +/// behavior of the program in a more fine-grained way. We have determined that +/// this transformation is safe already. We return the first global variable we +/// insert so that the caller can reprocess it. +static GlobalVariable *SRAGlobal(GlobalVariable *GV) { + assert(GV->hasInternalLinkage() && !GV->isConstant()); + Constant *Init = GV->getInitializer(); + const Type *Ty = Init->getType(); + + std::vector<GlobalVariable*> NewGlobals; + Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + NewGlobals.reserve(STy->getNumElements()); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::Int32Ty, i)); + assert(In && "Couldn't get element of initializer?"); + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+utostr(i), + (Module *)NULL, + GV->isThreadLocal()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + } + } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + unsigned NumElements = 0; + if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) + NumElements = ATy->getNumElements(); + else if (const VectorType *PTy = dyn_cast<VectorType>(STy)) + NumElements = PTy->getNumElements(); + else + assert(0 && "Unknown aggregate sequential type!"); + + if (NumElements > 16 && GV->hasNUsesOrMore(16)) + return 0; // It's not worth it. + NewGlobals.reserve(NumElements); + for (unsigned i = 0, e = NumElements; i != e; ++i) { + Constant *In = getAggregateConstantElement(Init, + ConstantInt::get(Type::Int32Ty, i)); + assert(In && "Couldn't get element of initializer?"); + + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable::InternalLinkage, + In, GV->getName()+"."+utostr(i), + (Module *)NULL, + GV->isThreadLocal()); + Globals.insert(GV, NGV); + NewGlobals.push_back(NGV); + } + } + + if (NewGlobals.empty()) + return 0; + + DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; + + Constant *NullInt = Constant::getNullValue(Type::Int32Ty); + + // Loop over all of the uses of the global, replacing the constantexpr geps, + // with smaller constantexpr geps or direct references. + while (!GV->use_empty()) { + User *GEP = GV->use_back(); + assert(((isa<ConstantExpr>(GEP) && + cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| + isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); + + // Ignore the 1th operand, which has to be zero or else the program is quite + // broken (undefined). Get the 2nd operand, which is the structure or array + // index. + unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); + if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. + + Value *NewPtr = NewGlobals[Val]; + + // Form a shorter GEP if needed. + if (GEP->getNumOperands() > 3) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { + SmallVector<Constant*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) + Idxs.push_back(CE->getOperand(i)); + NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), + &Idxs[0], Idxs.size()); + } else { + GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); + SmallVector<Value*, 8> Idxs; + Idxs.push_back(NullInt); + for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) + Idxs.push_back(GEPI->getOperand(i)); + NewPtr = new GetElementPtrInst(NewPtr, &Idxs[0], Idxs.size(), + GEPI->getName()+"."+utostr(Val), GEPI); + } + GEP->replaceAllUsesWith(NewPtr); + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) + GEPI->eraseFromParent(); + else + cast<ConstantExpr>(GEP)->destroyConstant(); + } + + // Delete the old global, now that it is dead. + Globals.erase(GV); + ++NumSRA; + + // Loop over the new globals array deleting any globals that are obviously + // dead. This can arise due to scalarization of a structure or an array that + // has elements that are dead. + unsigned FirstGlobal = 0; + for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) + if (NewGlobals[i]->use_empty()) { + Globals.erase(NewGlobals[i]); + if (FirstGlobal == i) ++FirstGlobal; + } + + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; +} + +/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified +/// value will trap if the value is dynamically null. +static bool AllUsesOfValueWillTrapIfNull(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa<LoadInst>(*UI)) { + // Will trap. + } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (SI->getOperand(0) == V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Storing the value. + } + } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + if (CI->getOperand(0) != V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Not calling the ptr + } + } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + if (II->getOperand(0) != V) { + //cerr << "NONTRAPPING USE: " << **UI; + return false; // Not calling the ptr + } + } else if (CastInst *CI = dyn_cast<CastInst>(*UI)) { + if (!AllUsesOfValueWillTrapIfNull(CI)) return false; + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) { + if (!AllUsesOfValueWillTrapIfNull(GEPI)) return false; + } else if (isa<ICmpInst>(*UI) && + isa<ConstantPointerNull>(UI->getOperand(1))) { + // Ignore setcc X, null + } else { + //cerr << "NONTRAPPING USE: " << **UI; + return false; + } + return true; +} + +/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads +/// from GV will trap if the loaded value is null. Note that this also permits +/// comparisons of the loaded value against null, as a special case. +static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (!AllUsesOfValueWillTrapIfNull(LI)) + return false; + } else if (isa<StoreInst>(*UI)) { + // Ignore stores to the global. + } else { + // We don't know or understand this user, bail out. + //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + return false; + } + + return true; +} + +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { + bool Changed = false; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { + Instruction *I = cast<Instruction>(*UI++); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + LI->setOperand(0, NewV); + Changed = true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (SI->getOperand(1) == V) { + SI->setOperand(1, NewV); + Changed = true; + } + } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + if (I->getOperand(0) == V) { + // Calling through the pointer! Turn into a direct call, but be careful + // that the pointer is not also being passed as an argument. + I->setOperand(0, NewV); + Changed = true; + bool PassedAsArg = false; + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i) == V) { + PassedAsArg = true; + I->setOperand(i, NewV); + } + + if (PassedAsArg) { + // Being passed as an argument also. Be careful to not invalidate UI! + UI = V->use_begin(); + } + } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + Changed |= OptimizeAwayTrappingUsesOfValue(CI, + ConstantExpr::getCast(CI->getOpcode(), + NewV, CI->getType())); + if (CI->use_empty()) { + Changed = true; + CI->eraseFromParent(); + } + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + // Should handle GEP here. + SmallVector<Constant*, 8> Idxs; + Idxs.reserve(GEPI->getNumOperands()-1); + for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i) + if (Constant *C = dyn_cast<Constant>(GEPI->getOperand(i))) + Idxs.push_back(C); + else + break; + if (Idxs.size() == GEPI->getNumOperands()-1) + Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, + ConstantExpr::getGetElementPtr(NewV, &Idxs[0], + Idxs.size())); + if (GEPI->use_empty()) { + Changed = true; + GEPI->eraseFromParent(); + } + } + } + + return Changed; +} + + +/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null +/// value stored into it. If there are uses of the loaded value that would trap +/// if the loaded value is dynamically null, then we know that they cannot be +/// reachable with a null optimize away the load. +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { + std::vector<LoadInst*> Loads; + bool Changed = false; + + // Replace all uses of loads with uses of uses of the stored value. + for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); + GUI != E; ++GUI) + if (LoadInst *LI = dyn_cast<LoadInst>(*GUI)) { + Loads.push_back(LI); + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + } else { + // If we get here we could have stores, selects, or phi nodes whose values + // are loaded. + assert((isa<StoreInst>(*GUI) || isa<PHINode>(*GUI) || + isa<SelectInst>(*GUI)) && + "Only expect load and stores!"); + } + + if (Changed) { + DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; + ++NumGlobUses; + } + + // Delete all of the loads we can, keeping track of whether we nuked them all! + bool AllLoadsGone = true; + while (!Loads.empty()) { + LoadInst *L = Loads.back(); + if (L->use_empty()) { + L->eraseFromParent(); + Changed = true; + } else { + AllLoadsGone = false; + } + Loads.pop_back(); + } + + // If we nuked all of the loads, then none of the stores are needed either, + // nor is the global. + if (AllLoadsGone) { + DOUT << " *** GLOBAL NOW DEAD!\n"; + CleanupConstantGlobalUsers(GV, 0); + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + } + Changed = true; + } + return Changed; +} + +/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the +/// instructions that are foldable. +static void ConstantPropUsersOf(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) + if (Instruction *I = dyn_cast<Instruction>(*UI++)) + if (Constant *NewC = ConstantFoldInstruction(I)) { + I->replaceAllUsesWith(NewC); + + // Advance UI to the next non-I use to avoid invalidating it! + // Instructions could multiply use V. + while (UI != E && *UI == I) + ++UI; + I->eraseFromParent(); + } +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + MallocInst *MI) { + DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI; + ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); + + if (NElements->getZExtValue() != 1) { + // If we have an array allocation, transform it to a single element + // allocation to make the code below simpler. + Type *NewTy = ArrayType::get(MI->getAllocatedType(), + NElements->getZExtValue()); + MallocInst *NewMI = + new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), + MI->getAlignment(), MI->getName(), MI); + Value* Indices[2]; + Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); + Value *NewGEP = new GetElementPtrInst(NewMI, Indices, 2, + NewMI->getName()+".el0", MI); + MI->replaceAllUsesWith(NewGEP); + MI->eraseFromParent(); + MI = NewMI; + } + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + Constant *Init = UndefValue::get(MI->getAllocatedType()); + GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, + GlobalValue::InternalLinkage, Init, + GV->getName()+".body", + (Module *)NULL, + GV->isThreadLocal()); + GV->getParent()->getGlobalList().insert(GV, NewGV); + + // Anything that used the malloc now uses the global directly. + MI->replaceAllUsesWith(NewGV); + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, + ConstantInt::getFalse(), GV->getName()+".init", + (Module *)NULL, GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + std::vector<StoreInst*> Stores; + while (!GV->use_empty()) + if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa<ICmpInst>(LoadUse.getUser())) + LoadUse = RepValue; + else { + ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); + InitBoolUsed = true; + switch (CI->getPredicate()) { + default: assert(0 && "Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + LV = ConstantInt::getFalse(); // X < null -> always false + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::createNot(LV, "notinit", CI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + CI->replaceAllUsesWith(LV); + CI->eraseFromParent(); + } + } + LI->eraseFromParent(); + } else { + StoreInst *SI = cast<StoreInst>(GV->use_back()); + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(), InitBool, SI); + SI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast<Instruction>(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + + // Now the GV is dead, nuke it and the malloc. + GV->eraseFromParent(); + MI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue); + + return NewGV; +} + +/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking +/// to make sure that there are no complex uses of V. We permit simple things +/// like dereferencing the pointer, but not storing through the address, unless +/// it is to the specified global. +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, + GlobalVariable *GV) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI) + if (isa<LoadInst>(*UI) || isa<CmpInst>(*UI)) { + // Fine, ignore. + } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (SI->getOperand(0) == V && SI->getOperand(1) != GV) + return false; // Storing the pointer itself... bad. + // Otherwise, storing through it, or storing into GV... fine. + } else if (isa<GetElementPtrInst>(*UI) || isa<SelectInst>(*UI)) { + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(cast<Instruction>(*UI),GV)) + return false; + } else { + return false; + } + return true; +} + +/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV +/// somewhere. Transform all uses of the allocation into loads from the +/// global and uses of the resultant pointer. Further, delete the store into +/// GV. This assumes that these value pass the +/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. +static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, + GlobalVariable *GV) { + while (!Alloc->use_empty()) { + Instruction *U = Alloc->use_back(); + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // If this is the store of the allocation into the global, remove it. + if (SI->getOperand(1) == GV) { + SI->eraseFromParent(); + continue; + } + } + + // Insert a load from the global, and use it instead of the malloc. + Value *NL = new LoadInst(GV, GV->getName()+".val", U); + U->replaceUsesOfWith(Alloc, NL); + } +} + +/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from +/// GV are simple enough to perform HeapSRA, return true. +static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + ++UI) + if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + // We permit two users of the load: setcc comparing against the null + // pointer, and a getelementptr of a specific form. + for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E; + ++UI) { + // Comparison against null is ok. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + continue; + } + + // getelementptr is also ok, but only a simple form. + GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI); + if (!GEPI) return false; + + // Must index into the array and into the struct. + if (GEPI->getNumOperands() < 3) + return false; + + // Otherwise the GEP is ok. + continue; + } + } + return true; +} + +/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr +/// is a value loaded from the global. Eliminate all uses of Ptr, making them +/// use FieldGlobals instead. All uses of loaded values satisfy +/// GlobalLoadUsesSimpleEnoughForHeapSRA. +static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Ptr, + const std::vector<GlobalVariable*> &FieldGlobals) { + std::vector<Value *> InsertedLoadsForPtr; + //InsertedLoadsForPtr.resize(FieldGlobals.size()); + while (!Ptr->use_empty()) { + Instruction *User = Ptr->use_back(); + + // If this is a comparison against null, handle it. + if (ICmpInst *SCI = dyn_cast<ICmpInst>(User)) { + assert(isa<ConstantPointerNull>(SCI->getOperand(1))); + // If we have a setcc of the loaded pointer, we can use a setcc of any + // field. + Value *NPtr; + if (InsertedLoadsForPtr.empty()) { + NPtr = new LoadInst(FieldGlobals[0], Ptr->getName()+".f0", Ptr); + InsertedLoadsForPtr.push_back(Ptr); + } else { + NPtr = InsertedLoadsForPtr.back(); + } + + Value *New = new ICmpInst(SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName(), SCI); + SCI->replaceAllUsesWith(New); + SCI->eraseFromParent(); + continue; + } + + // Otherwise, this should be: 'getelementptr Ptr, Idx, uint FieldNo ...' + GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); + assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) + && "Unexpected GEPI!"); + + // Load the pointer for this field. + unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); + if (InsertedLoadsForPtr.size() <= FieldNo) + InsertedLoadsForPtr.resize(FieldNo+1); + if (InsertedLoadsForPtr[FieldNo] == 0) + InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo], + Ptr->getName()+".f" + + utostr(FieldNo), Ptr); + Value *NewPtr = InsertedLoadsForPtr[FieldNo]; + + // Create the new GEP idx vector. + SmallVector<Value*, 8> GEPIdx; + GEPIdx.push_back(GEPI->getOperand(1)); + GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); + + Value *NGEPI = new GetElementPtrInst(NewPtr, &GEPIdx[0], GEPIdx.size(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NGEPI); + GEPI->eraseFromParent(); + } +} + +/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ + DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI; + const StructType *STy = cast<StructType>(MI->getAllocatedType()); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(MI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as MI, and N globals. + std::vector<GlobalVariable*> FieldGlobals; + std::vector<MallocInst*> FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const Type *PFieldTy = PointerType::get(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + utostr(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), + MI->getName() + ".f" + utostr(FieldNo),MI); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, MI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull", MI); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = MI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp", NullPtrBlock); + BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent()); + BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent()); + new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + new BranchInst(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + new BranchInst(ContBB, NullPtrBlock); + + + // MI is no longer needed, remove it. + MI->eraseFromParent(); + + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + while (!GV->use_empty()) { + if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { + RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals); + LI->eraseFromParent(); + } else { + // Must be a store of null. + StoreInst *SI = cast<StoreInst>(GV->use_back()); + assert(isa<Constant>(SI->getOperand(0)) && + cast<Constant>(SI->getOperand(0))->isNullValue() && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Constant *Null = + Constant::getNullValue(FieldGlobals[i]->getType()->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return FieldGlobals[0]; +} + + +// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge +// that only one value (besides its initializer) is ever stored to the global. +static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + Module::global_iterator &GVI, + TargetData &TD) { + if (CastInst *CI = dyn_cast<CastInst>(StoredOnceVal)) + StoredOnceVal = CI->getOperand(0); + else if (GetElementPtrInst *GEPI =dyn_cast<GetElementPtrInst>(StoredOnceVal)){ + // "getelementptr Ptr, 0, 0, 0" is really just a cast. + bool IsJustACast = true; + for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i) + if (!isa<Constant>(GEPI->getOperand(i)) || + !cast<Constant>(GEPI->getOperand(i))->isNullValue()) { + IsJustACast = false; + break; + } + if (IsJustACast) + StoredOnceVal = GEPI->getOperand(0); + } + + // If we are dealing with a pointer global that is initialized to null and + // only has one (non-null) value stored into it, then we can optimize any + // users of the loaded value (often calls and loads) that would trap if the + // value was null. + if (isa<PointerType>(GV->getInitializer()->getType()) && + GV->getInitializer()->isNullValue()) { + if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { + if (GV->getInitializer()->getType() != SOVC->getType()) + SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + + // Optimize away any trapping uses of the loaded value. + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + return true; + } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { + // If this is a malloc of an abstract type, don't touch it. + if (!MI->getAllocatedType()->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) + return false; + + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue()* + TD.getTypeSize(MI->getAllocatedType()) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, MI); + return true; + } + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + if (const StructType *AllocTy = + dyn_cast<StructType>(MI->getAllocatedType())) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 && + GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) { + GVI = PerformHeapAllocSRoA(GV, MI); + return true; + } + } + } + } + + return false; +} + +/// ShrinkGlobalToBoolean - At this point, we have learned that the only two +/// values ever stored into GV are its initializer and OtherVal. +static void ShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { + // Create the new global, initializing it to false. + GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, + GlobalValue::InternalLinkage, ConstantInt::getFalse(), + GV->getName()+".b", + (Module *)NULL, + GV->isThreadLocal()); + GV->getParent()->getGlobalList().insert(GV, NewGV); + + Constant *InitVal = GV->getInitializer(); + assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); + + // If initialized to zero and storing one into the global, we can use a cast + // instead of a select to synthesize the desired value. + bool IsOneZero = false; + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) + IsOneZero = InitVal->isNullValue() && CI->isOne(); + + while (!GV->use_empty()) { + Instruction *UI = cast<Instruction>(GV->use_back()); + if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { + // Change the store into a boolean store. + bool StoringOther = SI->getOperand(0) == OtherVal; + // Only do this if we weren't storing a loaded value. + Value *StoreVal; + if (StoringOther || SI->getOperand(0) == InitVal) + StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); + else { + // Otherwise, we are storing a previously loaded copy. To do this, + // change the copy from copying the original value to just copying the + // bool. + Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); + + // If we're already replaced the input, StoredVal will be a cast or + // select instruction. If not, it will be a load of the original + // global. + if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { + assert(LI->getOperand(0) == GV && "Not a copy!"); + // Insert a new load, to preserve the saved value. + StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); + } else { + assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && + "This is not a form that we understand!"); + StoreVal = StoredVal->getOperand(0); + assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); + } + } + new StoreInst(StoreVal, NewGV, SI); + } else if (!UI->use_empty()) { + // Change the load into a load of bool then a select. + LoadInst *LI = cast<LoadInst>(UI); + LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); + Value *NSI; + if (IsOneZero) + NSI = new ZExtInst(NLI, LI->getType(), "", LI); + else + NSI = new SelectInst(NLI, OtherVal, InitVal, "", LI); + NSI->takeName(LI); + LI->replaceAllUsesWith(NSI); + } + UI->eraseFromParent(); + } + + GV->eraseFromParent(); +} + + +/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// it if possible. If we make a change, return true. +bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, + Module::global_iterator &GVI) { + std::set<PHINode*> PHIUsers; + GlobalStatus GS; + GV->removeDeadConstantUsers(); + + if (GV->use_empty()) { + DOUT << "GLOBAL DEAD: " << *GV; + GV->eraseFromParent(); + ++NumDeleted; + return true; + } + + if (!AnalyzeGlobal(GV, GS, PHIUsers)) { +#if 0 + cerr << "Global: " << *GV; + cerr << " isLoaded = " << GS.isLoaded << "\n"; + cerr << " StoredType = "; + switch (GS.StoredType) { + case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; + case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; + case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; + case GlobalStatus::isStored: cerr << "stored\n"; break; + } + if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) + cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; + if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) + cerr << " AccessingFunction = " << GS.AccessingFunction->getName() + << "\n"; + cerr << " HasMultipleAccessingFunctions = " + << GS.HasMultipleAccessingFunctions << "\n"; + cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; + cerr << " isNotSuitableForSRA = " << GS.isNotSuitableForSRA << "\n"; + cerr << "\n"; +#endif + + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive we can make + // this global a local variable) we replace the global with a local alloca + // in this function. + // + // NOTE: It doesn't make sense to promote non first class types since we + // are just replacing static memory to stack memory. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isFirstClassType() && + GS.AccessingFunction->getName() == "main" && + GS.AccessingFunction->hasExternalLinkage()) { + DOUT << "LOCALIZING GLOBAL: " << *GV; + Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); + const Type* ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); + if (!isa<UndefValue>(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } + + // If the global is never loaded (but may be stored to), it is dead. + // Delete it now. + if (!GS.isLoaded) { + DOUT << "GLOBAL NEVER LOADED: " << *GV; + + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, delete it. + if (GV->use_empty()) { + GV->eraseFromParent(); + ++NumDeleted; + Changed = true; + } + return Changed; + + } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { + DOUT << "MARKING CONSTANT: " << *GV; + GV->setConstant(true); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + // If the global is dead now, just nuke it. + if (GV->use_empty()) { + DOUT << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"; + GV->eraseFromParent(); + ++NumDeleted; + } + + ++NumMarked; + return true; + } else if (!GS.isNotSuitableForSRA && + !GV->getInitializer()->getType()->isFirstClassType()) { + if (GlobalVariable *FirstNewGV = SRAGlobal(GV)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } + } else if (GS.StoredType == GlobalStatus::isStoredOnce) { + // If the initial value for the global was an undef value, and if only + // one other value was stored into it, we can just change the + // initializer to be an undef value, then delete all stores to the + // global. This allows us to mark it constant. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (isa<UndefValue>(GV->getInitializer())) { + // Change the initial value here. + GV->setInitializer(SOVConstant); + + // Clean up any obviously simplifiable users now. + CleanupConstantGlobalUsers(GV, GV->getInitializer()); + + if (GV->use_empty()) { + DOUT << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"; + GV->eraseFromParent(); + ++NumDeleted; + } else { + GVI = GV; + } + ++NumSubstitute; + return true; + } + + // Try to optimize globals based on the knowledge that only one value + // (besides its initializer) is ever stored to the global. + if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, + getAnalysis<TargetData>())) + return true; + + // Otherwise, if the global was not a boolean, we can shrink it to be a + // boolean. + if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) + if (GV->getType()->getElementType() != Type::Int1Ty && + !GV->getType()->getElementType()->isFloatingPoint() && + !isa<VectorType>(GV->getType()->getElementType()) && + !GS.HasPHIUser && !GS.isNotSuitableForSRA) { + DOUT << " *** SHRINKING TO BOOL: " << *GV; + ShrinkGlobalToBoolean(GV, SOVConstant); + ++NumShrunkToBool; + return true; + } + } + } + return false; +} + +/// OnlyCalledDirectly - Return true if the specified function is only called +/// directly. In other words, its address is never taken. +static bool OnlyCalledDirectly(Function *F) { + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + Instruction *User = dyn_cast<Instruction>(*UI); + if (!User) return false; + if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false; + + // See if the function address is passed as an argument. + for (unsigned i = 1, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == F) return false; + } + return true; +} + +/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified +/// function, changing them to FastCC. +static void ChangeCalleesToFastCall(Function *F) { + for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + Instruction *User = cast<Instruction>(*UI); + if (CallInst *CI = dyn_cast<CallInst>(User)) + CI->setCallingConv(CallingConv::Fast); + else + cast<InvokeInst>(User)->setCallingConv(CallingConv::Fast); + } +} + +bool GlobalOpt::OptimizeFunctions(Module &M) { + bool Changed = false; + // Optimize functions. + for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { + Function *F = FI++; + F->removeDeadConstantUsers(); + if (F->use_empty() && (F->hasInternalLinkage() || + F->hasLinkOnceLinkage())) { + M.getFunctionList().erase(F); + Changed = true; + ++NumFnDeleted; + } else if (F->hasInternalLinkage() && + F->getCallingConv() == CallingConv::C && !F->isVarArg() && + OnlyCalledDirectly(F)) { + // If this function has C calling conventions, is not a varargs + // function, and is only called directly, promote it to use the Fast + // calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + } + return Changed; +} + +bool GlobalOpt::OptimizeGlobalVars(Module &M) { + bool Changed = false; + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ) { + GlobalVariable *GV = GVI++; + if (!GV->isConstant() && GV->hasInternalLinkage() && + GV->hasInitializer()) + Changed |= ProcessInternalGlobal(GV, GVI); + } + return Changed; +} + +/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// initializers have an init priority of 65535. +GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->getName() == "llvm.global_ctors") { + // Found it, verify it's an array of { int, void()* }. + const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); + if (!ATy) return 0; + const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + if (!STy || STy->getNumElements() != 2 || + STy->getElementType(0) != Type::Int32Ty) return 0; + const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); + if (!PFTy) return 0; + const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); + if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || + FTy->getNumParams() != 0) + return 0; + + // Verify that the initializer is simple enough for us to handle. + if (!I->hasInitializer()) return 0; + ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); + if (!CA) return 0; + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(CA->getOperand(i))) { + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return 0; + + // Init priority must be standard. + ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!CI || CI->getZExtValue() != 65535) + return 0; + } else { + return 0; + } + + return I; + } + return 0; +} + +/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<Function*> Result; + Result.reserve(CA->getNumOperands()); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + ConstantStruct *CS = cast<ConstantStruct>(CA->getOperand(i)); + Result.push_back(dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the +/// specified array, returning the new global to use. +static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, + const std::vector<Function*> &Ctors) { + // If we made a change, reassemble the initializer list. + std::vector<Constant*> CSVals; + CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); + CSVals.push_back(0); + + // Create the new init list. + std::vector<Constant*> CAList; + for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { + if (Ctors[i]) { + CSVals[1] = Ctors[i]; + } else { + const Type *FTy = FunctionType::get(Type::VoidTy, + std::vector<const Type*>(), false); + const PointerType *PFTy = PointerType::get(FTy); + CSVals[1] = Constant::getNullValue(PFTy); + CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); + } + CAList.push_back(ConstantStruct::get(CSVals)); + } + + // Create the array initializer. + const Type *StructTy = + cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), + CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return GCL; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), + GCL->getLinkage(), CA, "", + (Module *)NULL, + GCL->isThreadLocal()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); + + if (Ctors.size()) + return NGV; + else + return 0; +} + + +static Constant *getVal(std::map<Value*, Constant*> &ComputedValues, + Value *V) { + if (Constant *CV = dyn_cast<Constant>(V)) return CV; + Constant *R = ComputedValues[V]; + assert(R && "Reference to an uncomputed value!"); + return R; +} + +/// isSimpleEnoughPointerToCommit - Return true if this constant is simple +/// enough for us to understand. In particular, if it is a cast of something, +/// we punt. We basically just support direct accesses to globals and GEP's of +/// globals. This should be kept up to date with CommitValueTo. +static bool isSimpleEnoughPointerToCommit(Constant *C) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (!GV->hasExternalLinkage() && !GV->hasInternalLinkage()) + return false; // do not allow weak/linkonce/dllimport/dllexport linkage. + return !GV->isDeclaration(); // reject external globals. + } + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + // Handle a constantexpr gep. + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (!GV->hasExternalLinkage() && !GV->hasInternalLinkage()) + return false; // do not allow weak/linkonce/dllimport/dllexport linkage. + return GV->hasInitializer() && + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + return false; +} + +/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global +/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. +/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. +static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, + ConstantExpr *Addr, unsigned OpNo) { + // Base case of the recursion. + if (OpNo == Addr->getNumOperands()) { + assert(Val->getType() == Init->getType() && "Type mismatch!"); + return Val; + } + + if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + std::vector<Constant*> Elts; + + // Break up the constant into its elements. + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) + Elts.push_back(CS->getOperand(i)); + } else if (isa<ConstantAggregateZero>(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(Constant::getNullValue(STy->getElementType(i))); + } else if (isa<UndefValue>(Init)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Elts.push_back(UndefValue::get(STy->getElementType(i))); + } else { + assert(0 && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + } + + // Replace the element that we are supposed to. + ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); + unsigned Idx = CU->getZExtValue(); + assert(Idx < STy->getNumElements() && "Struct index out of range!"); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + + // Return the modified struct. + return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); + } else { + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + const ArrayType *ATy = cast<ArrayType>(Init->getType()); + + // Break up the array into elements. + std::vector<Constant*> Elts; + if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + Elts.push_back(CA->getOperand(i)); + } else if (isa<ConstantAggregateZero>(Init)) { + Constant *Elt = Constant::getNullValue(ATy->getElementType()); + Elts.assign(ATy->getNumElements(), Elt); + } else if (isa<UndefValue>(Init)) { + Constant *Elt = UndefValue::get(ATy->getElementType()); + Elts.assign(ATy->getNumElements(), Elt); + } else { + assert(0 && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + } + + assert(CI->getZExtValue() < ATy->getNumElements()); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + return ConstantArray::get(ATy, Elts); + } +} + +/// CommitValueTo - We have decided that Addr (which satisfies the predicate +/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. +static void CommitValueTo(Constant *Val, Constant *Addr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + assert(GV->hasInitializer()); + GV->setInitializer(Val); + return; + } + + ConstantExpr *CE = cast<ConstantExpr>(Addr); + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + + Constant *Init = GV->getInitializer(); + Init = EvaluateStoreInto(Init, Val, CE, 2); + GV->setInitializer(Init); +} + +/// ComputeLoadResult - Return the value that would be computed by a load from +/// P after the stores reflected by 'memory' have been performed. If we can't +/// decide, return null. +static Constant *ComputeLoadResult(Constant *P, + const std::map<Constant*, Constant*> &Memory) { + // If this memory location has been recently stored, use the stored value: it + // is the most up-to-date. + std::map<Constant*, Constant*>::const_iterator I = Memory.find(P); + if (I != Memory.end()) return I->second; + + // Access it. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { + if (GV->hasInitializer()) + return GV->getInitializer(); + return 0; + } + + // Handle a constantexpr getelementptr. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) + if (CE->getOpcode() == Instruction::GetElementPtr && + isa<GlobalVariable>(CE->getOperand(0))) { + GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); + if (GV->hasInitializer()) + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + } + + return 0; // don't know how to evaluate. +} + +/// EvaluateFunction - Evaluate a call to function F, returning true if +/// successful, false if we can't evaluate it. ActualArgs contains the formal +/// arguments for the function. +static bool EvaluateFunction(Function *F, Constant *&RetVal, + const std::vector<Constant*> &ActualArgs, + std::vector<Function*> &CallStack, + std::map<Constant*, Constant*> &MutatedMemory, + std::vector<GlobalVariable*> &AllocaTmps) { + // Check to see if this function is already executing (recursion). If so, + // bail out. TODO: we might want to accept limited recursion. + if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) + return false; + + CallStack.push_back(F); + + /// Values - As we compute SSA register values, we store their contents here. + std::map<Value*, Constant*> Values; + + // Initialize arguments to the incoming values specified. + unsigned ArgNo = 0; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; + ++AI, ++ArgNo) + Values[AI] = ActualArgs[ArgNo]; + + /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, + /// we can only evaluate any one basic block at most once. This set keeps + /// track of what we have executed so we can detect recursive cases etc. + std::set<BasicBlock*> ExecutedBlocks; + + // CurInst - The current instruction we're evaluating. + BasicBlock::iterator CurInst = F->begin()->begin(); + + // This is the main evaluation loop. + while (1) { + Constant *InstResult = 0; + + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { + if (SI->isVolatile()) return false; // no volatile accesses. + Constant *Ptr = getVal(Values, SI->getOperand(1)); + if (!isSimpleEnoughPointerToCommit(Ptr)) + // If this is too complex for us to commit, reject it. + return false; + Constant *Val = getVal(Values, SI->getOperand(0)); + MutatedMemory[Ptr] = Val; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { + InstResult = ConstantExpr::get(BO->getOpcode(), + getVal(Values, BO->getOperand(0)), + getVal(Values, BO->getOperand(1))); + } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { + InstResult = ConstantExpr::getCompare(CI->getPredicate(), + getVal(Values, CI->getOperand(0)), + getVal(Values, CI->getOperand(1))); + } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { + InstResult = ConstantExpr::getCast(CI->getOpcode(), + getVal(Values, CI->getOperand(0)), + CI->getType()); + } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + getVal(Values, SI->getOperand(1)), + getVal(Values, SI->getOperand(2))); + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { + Constant *P = getVal(Values, GEP->getOperand(0)); + SmallVector<Constant*, 8> GEPOps; + for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) + GEPOps.push_back(getVal(Values, GEP->getOperand(i))); + InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { + if (LI->isVolatile()) return false; // no volatile accesses. + InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), + MutatedMemory); + if (InstResult == 0) return false; // Could not evaluate load. + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { + if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. + const Type *Ty = AI->getType()->getElementType(); + AllocaTmps.push_back(new GlobalVariable(Ty, false, + GlobalValue::InternalLinkage, + UndefValue::get(Ty), + AI->getName())); + InstResult = AllocaTmps.back(); + } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { + // Cannot handle inline asm. + if (isa<InlineAsm>(CI->getOperand(0))) return false; + + // Resolve function pointers. + Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); + if (!Callee) return false; // Cannot resolve. + + std::vector<Constant*> Formals; + for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) + Formals.push_back(getVal(Values, CI->getOperand(i))); + + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(Callee, &Formals[0], + Formals.size())) { + InstResult = C; + } else { + return false; + } + } else { + if (Callee->getFunctionType()->isVarArg()) + return false; + + Constant *RetVal; + + // Execute the call, if successful, use the return value. + if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, + MutatedMemory, AllocaTmps)) + return false; + InstResult = RetVal; + } + } else if (isa<TerminatorInst>(CurInst)) { + BasicBlock *NewBB = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { + if (BI->isUnconditional()) { + NewBB = BI->getSuccessor(0); + } else { + ConstantInt *Cond = + dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); + if (!Cond) return false; // Cannot determine. + + NewBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { + ConstantInt *Val = + dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); + if (!Val) return false; // Cannot determine. + NewBB = SI->getSuccessor(SI->findCaseValue(Val)); + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { + if (RI->getNumOperands()) + RetVal = getVal(Values, RI->getOperand(0)); + + CallStack.pop_back(); // return from fn. + return true; // We succeeded at evaluating this ctor! + } else { + // invoke, unwind, unreachable. + return false; // Cannot handle this terminator. + } + + // Okay, we succeeded in evaluating this control flow. See if we have + // executed the new block before. If so, we have a looping function, + // which we cannot evaluate in reasonable time. + if (!ExecutedBlocks.insert(NewBB).second) + return false; // looped! + + // Okay, we have never been in this block before. Check to see if there + // are any PHI nodes. If so, evaluate them with information about where + // we came from. + BasicBlock *OldBB = CurInst->getParent(); + CurInst = NewBB->begin(); + PHINode *PN; + for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) + Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); + + // Do NOT increment CurInst. We know that the terminator had no value. + continue; + } else { + // Did not know how to evaluate this! + return false; + } + + if (!CurInst->use_empty()) + Values[CurInst] = InstResult; + + // Advance program counter. + ++CurInst; + } +} + +/// EvaluateStaticConstructor - Evaluate static constructors in the function, if +/// we can. Return true if we can, false otherwise. +static bool EvaluateStaticConstructor(Function *F) { + /// MutatedMemory - For each store we execute, we update this map. Loads + /// check this to get the most up-to-date value. If evaluation is successful, + /// this state is committed to the process. + std::map<Constant*, Constant*> MutatedMemory; + + /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable + /// to represent its body. This vector is needed so we can delete the + /// temporary globals when we are done. + std::vector<GlobalVariable*> AllocaTmps; + + /// CallStack - This is used to detect recursion. In pathological situations + /// we could hit exponential behavior, but at least there is nothing + /// unbounded. + std::vector<Function*> CallStack; + + // Call the function. + Constant *RetValDummy; + bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(), + CallStack, MutatedMemory, AllocaTmps); + if (EvalSuccess) { + // We succeeded at evaluation: commit the result. + DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << MutatedMemory.size() + << " stores.\n"; + for (std::map<Constant*, Constant*>::iterator I = MutatedMemory.begin(), + E = MutatedMemory.end(); I != E; ++I) + CommitValueTo(I->second, I->first); + } + + // At this point, we are done interpreting. If we created any 'alloca' + // temporaries, release them now. + while (!AllocaTmps.empty()) { + GlobalVariable *Tmp = AllocaTmps.back(); + AllocaTmps.pop_back(); + + // If there are still users of the alloca, the program is doing something + // silly, e.g. storing the address of the alloca somewhere and using it + // later. Since this is undefined, we'll just make it be null. + if (!Tmp->use_empty()) + Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); + delete Tmp; + } + + return EvalSuccess; +} + + + +/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. +/// Return true if anything changed. +bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { + std::vector<Function*> Ctors = ParseGlobalCtors(GCL); + bool MadeChange = false; + if (Ctors.empty()) return false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (F == 0) { + if (i != Ctors.size()-1) { + Ctors.resize(i+1); + MadeChange = true; + } + break; + } + + // We cannot simplify external ctor functions. + if (F->empty()) continue; + + // If we can evaluate the ctor at compile time, do. + if (EvaluateStaticConstructor(F)) { + Ctors.erase(Ctors.begin()+i); + MadeChange = true; + --i; + ++NumCtorsEvaluated; + continue; + } + } + + if (!MadeChange) return false; + + GCL = InstallGlobalCtors(GCL, Ctors); + return true; +} + + +bool GlobalOpt::runOnModule(Module &M) { + bool Changed = false; + + // Try to find the llvm.globalctors list. + GlobalVariable *GlobalCtors = FindGlobalCtors(M); + + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + // Delete functions that are trivially dead, ccc -> fastcc + LocalChange |= OptimizeFunctions(M); + + // Optimize global_ctors list. + if (GlobalCtors) + LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + + // Optimize non-address-taken globals. + LocalChange |= OptimizeGlobalVars(M); + Changed |= LocalChange; + } + + // TODO: Move all global ctors functions to the end of the module for code + // layout. + + return Changed; +} diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp new file mode 100644 index 0000000..b55e538 --- /dev/null +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -0,0 +1,197 @@ +//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements an _extremely_ simple interprocedural constant +// propagation pass. It could certainly be improved in many different ways, +// like using a worklist. This pass makes arguments dead, but does not remove +// them. The existing dead argument elimination pass should be run after this +// to clean up the mess. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ipconstprop" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumArgumentsProped, "Number of args turned into constants"); +STATISTIC(NumReturnValProped, "Number of return values turned into constants"); + +namespace { + /// IPCP - The interprocedural constant propagation pass + /// + struct VISIBILITY_HIDDEN IPCP : public ModulePass { + static char ID; // Pass identification, replacement for typeid + IPCP() : ModulePass((intptr_t)&ID) {} + + bool runOnModule(Module &M); + private: + bool PropagateConstantsIntoArguments(Function &F); + bool PropagateConstantReturn(Function &F); + }; + char IPCP::ID = 0; + RegisterPass<IPCP> X("ipconstprop", "Interprocedural constant propagation"); +} + +ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } + +bool IPCP::runOnModule(Module &M) { + bool Changed = false; + bool LocalChange = true; + + // FIXME: instead of using smart algorithms, we just iterate until we stop + // making changes. + while (LocalChange) { + LocalChange = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration()) { + // Delete any klingons. + I->removeDeadConstantUsers(); + if (I->hasInternalLinkage()) + LocalChange |= PropagateConstantsIntoArguments(*I); + Changed |= PropagateConstantReturn(*I); + } + Changed |= LocalChange; + } + return Changed; +} + +/// PropagateConstantsIntoArguments - Look at all uses of the specified +/// function. If all uses are direct call sites, and all pass a particular +/// constant in for an argument, propagate that constant in as the argument. +/// +bool IPCP::PropagateConstantsIntoArguments(Function &F) { + if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. + + std::vector<std::pair<Constant*, bool> > ArgumentConstants; + ArgumentConstants.resize(F.arg_size()); + + unsigned NumNonconstant = 0; + + for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) + if (!isa<Instruction>(*I)) + return false; // Used by a non-instruction, do not transform + else { + CallSite CS = CallSite::get(cast<Instruction>(*I)); + if (CS.getInstruction() == 0 || + CS.getCalledFunction() != &F) + return false; // Not a direct call site? + + // Check out all of the potentially constant arguments + CallSite::arg_iterator AI = CS.arg_begin(); + Function::arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; + ++i, ++AI, ++Arg) { + if (*AI == &F) return false; // Passes the function into itself + + if (!ArgumentConstants[i].second) { + if (Constant *C = dyn_cast<Constant>(*AI)) { + if (!ArgumentConstants[i].first) + ArgumentConstants[i].first = C; + else if (ArgumentConstants[i].first != C) { + // Became non-constant + ArgumentConstants[i].second = true; + ++NumNonconstant; + if (NumNonconstant == ArgumentConstants.size()) return false; + } + } else if (*AI != &*Arg) { // Ignore recursive calls with same arg + // This is not a constant argument. Mark the argument as + // non-constant. + ArgumentConstants[i].second = true; + ++NumNonconstant; + if (NumNonconstant == ArgumentConstants.size()) return false; + } + } + } + } + + // If we got to this point, there is a constant argument! + assert(NumNonconstant != ArgumentConstants.size()); + Function::arg_iterator AI = F.arg_begin(); + bool MadeChange = false; + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) + // Do we have a constant argument!? + if (!ArgumentConstants[i].second && !AI->use_empty()) { + Value *V = ArgumentConstants[i].first; + if (V == 0) V = UndefValue::get(AI->getType()); + AI->replaceAllUsesWith(V); + ++NumArgumentsProped; + MadeChange = true; + } + return MadeChange; +} + + +// Check to see if this function returns a constant. If so, replace all callers +// that user the return value with the returned valued. If we can replace ALL +// callers, +bool IPCP::PropagateConstantReturn(Function &F) { + if (F.getReturnType() == Type::VoidTy) + return false; // No return value. + + // Check to see if this function returns a constant. + Value *RetVal = 0; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + if (isa<UndefValue>(RI->getOperand(0))) { + // Ignore. + } else if (Constant *C = dyn_cast<Constant>(RI->getOperand(0))) { + if (RetVal == 0) + RetVal = C; + else if (RetVal != C) + return false; // Does not return the same constant. + } else { + return false; // Does not return a constant. + } + + if (RetVal == 0) RetVal = UndefValue::get(F.getReturnType()); + + // If we got here, the function returns a constant value. Loop over all + // users, replacing any uses of the return value with the returned constant. + bool ReplacedAllUsers = true; + bool MadeChange = false; + for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) + if (!isa<Instruction>(*I)) + ReplacedAllUsers = false; + else { + CallSite CS = CallSite::get(cast<Instruction>(*I)); + if (CS.getInstruction() == 0 || + CS.getCalledFunction() != &F) { + ReplacedAllUsers = false; + } else { + if (!CS.getInstruction()->use_empty()) { + CS.getInstruction()->replaceAllUsesWith(RetVal); + MadeChange = true; + } + } + } + + // If we replace all users with the returned constant, and there can be no + // other callers of the function, replace the constant being returned in the + // function with an undef value. + if (ReplacedAllUsers && F.hasInternalLinkage() && !isa<UndefValue>(RetVal)) { + Value *RV = UndefValue::get(RetVal->getType()); + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + if (RI->getOperand(0) != RV) { + RI->setOperand(0, RV); + MadeChange = true; + } + } + } + + if (MadeChange) ++NumReturnValProped; + return MadeChange; +} diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp new file mode 100644 index 0000000..6b06469 --- /dev/null +++ b/lib/Transforms/IPO/IndMemRemoval.cpp @@ -0,0 +1,89 @@ +//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass finds places where memory allocation functions may escape into +// indirect land. Some transforms are much easier (aka possible) only if free +// or malloc are not called indirectly. +// Thus find places where the address of memory functions are taken and construct +// bounce functions with direct calls of those functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "indmemrem" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumBounceSites, "Number of sites modified"); +STATISTIC(NumBounce , "Number of bounce functions created"); + +namespace { + class VISIBILITY_HIDDEN IndMemRemPass : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + IndMemRemPass() : ModulePass((intptr_t)&ID) {} + + virtual bool runOnModule(Module &M); + }; + char IndMemRemPass::ID = 0; + RegisterPass<IndMemRemPass> X("indmemrem","Indirect Malloc and Free Removal"); +} // end anonymous namespace + + +bool IndMemRemPass::runOnModule(Module &M) { + //in Theory, all direct calls of malloc and free should be promoted + //to intrinsics. Therefor, this goes through and finds where the + //address of free or malloc are taken and replaces those with bounce + //functions, ensuring that all malloc and free that might happen + //happen through intrinsics. + bool changed = false; + if (Function* F = M.getFunction("free")) { + assert(F->isDeclaration() && "free not external?"); + if (!F->use_empty()) { + Function* FN = new Function(F->getFunctionType(), + GlobalValue::LinkOnceLinkage, + "free_llvm_bounce", &M); + BasicBlock* bb = new BasicBlock("entry",FN); + Instruction* R = new ReturnInst(bb); + new FreeInst(FN->arg_begin(), R); + ++NumBounce; + NumBounceSites += F->getNumUses(); + F->replaceAllUsesWith(FN); + changed = true; + } + } + if (Function* F = M.getFunction("malloc")) { + assert(F->isDeclaration() && "malloc not external?"); + if (!F->use_empty()) { + Function* FN = new Function(F->getFunctionType(), + GlobalValue::LinkOnceLinkage, + "malloc_llvm_bounce", &M); + BasicBlock* bb = new BasicBlock("entry",FN); + Instruction* c = CastInst::createIntegerCast( + FN->arg_begin(), Type::Int32Ty, false, "c", bb); + Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb); + new ReturnInst(a, bb); + ++NumBounce; + NumBounceSites += F->getNumUses(); + F->replaceAllUsesWith(FN); + changed = true; + } + } + return changed; +} + +ModulePass *llvm::createIndMemRemPass() { + return new IndMemRemPass(); +} diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp new file mode 100644 index 0000000..2157dcd --- /dev/null +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -0,0 +1,323 @@ +//===- InlineSimple.cpp - Code to perform simple function inlining --------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bottom-up inlining of functions into callees. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/CallingConv.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include <set> + +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN ArgInfo { + unsigned ConstantWeight; + unsigned AllocaWeight; + + ArgInfo(unsigned CWeight, unsigned AWeight) + : ConstantWeight(CWeight), AllocaWeight(AWeight) {} + }; + + // FunctionInfo - For each function, calculate the size of it in blocks and + // instructions. + struct VISIBILITY_HIDDEN FunctionInfo { + // NumInsts, NumBlocks - Keep track of how large each function is, which is + // used to estimate the code size cost of inlining it. + unsigned NumInsts, NumBlocks; + + // ArgumentWeights - Each formal argument of the function is inspected to + // see if it is used in any contexts where making it a constant or alloca + // would reduce the code size. If so, we add some value to the argument + // entry here. + std::vector<ArgInfo> ArgumentWeights; + + FunctionInfo() : NumInsts(0), NumBlocks(0) {} + + /// analyzeFunction - Fill in the current structure with information gleaned + /// from the specified function. + void analyzeFunction(Function *F); + }; + + class VISIBILITY_HIDDEN SimpleInliner : public Inliner { + std::map<const Function*, FunctionInfo> CachedFunctionInfo; + std::set<const Function*> NeverInline; // Functions that are never inlined + public: + SimpleInliner() : Inliner(&ID) {} + static char ID; // Pass identification, replacement for typeid + int getInlineCost(CallSite CS); + virtual bool doInitialization(CallGraph &CG); + }; + char SimpleInliner::ID = 0; + RegisterPass<SimpleInliner> X("inline", "Function Integration/Inlining"); +} + +Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +static unsigned CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa<BranchInst>(*UI)) + Reduction += 40; // Eliminating a conditional branch is a big win + else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI)) + // Eliminating a switch is a big win, proportional to the number of edges + // deleted. + Reduction += (SI->getNumSuccessors()-1) * 40; + else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += CI->getCalledValue() == V ? 500 : 0; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += II->getCalledValue() == V ? 500 : 0; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(**UI); + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += 7; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +static unsigned CountCodeReductionForAlloca(Value *V) { + if (!isa<PointerType>(V->getType())) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast<Instruction>(*UI); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += 10; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + for (Instruction::op_iterator I = GEP->op_begin()+1, E = GEP->op_end(); + I != E; ++I) + if (!isa<Constant>(*I)) return 0; + Reduction += CountCodeReductionForAlloca(GEP)+15; + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void FunctionInfo::analyzeFunction(Function *F) { + unsigned NumInsts = 0, NumBlocks = 0; + + // Look at the size of the callee. Each basic block counts as 20 units, and + // each instruction counts as 10. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count. + + // Noop casts, including ptr <-> int, don't count. + if (const CastInst *CI = dyn_cast<CastInst>(II)) { + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + isa<PtrToIntInst>(CI)) + continue; + } else if (const GetElementPtrInst *GEPI = + dyn_cast<GetElementPtrInst>(II)) { + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + bool AllConstant = true; + for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i) + if (!isa<ConstantInt>(GEPI->getOperand(i))) { + AllConstant = false; + break; + } + if (AllConstant) continue; + } + + ++NumInsts; + } + + ++NumBlocks; + } + + this->NumBlocks = NumBlocks; + this->NumInsts = NumInsts; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), + CountCodeReductionForAlloca(I))); +} + + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +int SimpleInliner::getInlineCost(CallSite CS) { + Instruction *TheCall = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + const Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline a directly recursive call. + if (Caller == Callee || + // Don't inline functions which can be redefined at link-time to mean + // something else. link-once linkage is ok though. + Callee->hasWeakLinkage() || + + // Don't inline functions marked noinline. + NeverInline.count(Callee)) + return 2000000000; + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->hasInternalLinkage() && Callee->hasOneUse()) + InlineCost -= 30000; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + InlineCost += 2000; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + InlineCost += 10000; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) + InlineCost += 10000; + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + // Each argument passed in has a cost at both the caller and the callee + // sides. This favors functions that take many arguments over functions + // that take few arguments. + InlineCost -= 20; + + // If this is a function being passed in, it is very likely that we will be + // able to turn an indirect function call into a direct function call. + if (isa<Function>(I)) + InlineCost -= 100; + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + else if (isa<AllocaInst>(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + } else if (isa<Constant>(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight; + } + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Don't inline into something too big, which would make it bigger. Here, we + // count each basic block as a single unit. + // + InlineCost += Caller->size()/20; + + + // Look at the size of the callee. Each basic block counts as 20 units, and + // each instruction counts as 5. + InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20; + return InlineCost; +} + +// doInitialization - Initializes the vector of functions that have been +// annotated with the noinline attribute. +bool SimpleInliner::doInitialization(CallGraph &CG) { + + Module &M = CG.getModule(); + + // Get llvm.noinline + GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); + + if (GV == 0) + return false; + + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + + if (InitList == 0) + return false; + + // Iterate over each element and add to the NeverInline set + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + + // Get Source + const Constant *Elt = InitList->getOperand(i); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) + if (CE->getOpcode() == Instruction::BitCast) + Elt = CE->getOperand(0); + + // Insert into set of functions to never inline + if (const Function *F = dyn_cast<Function>(Elt)) + NeverInline.insert(F); + } + + return false; +} diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp new file mode 100644 index 0000000..85893d7 --- /dev/null +++ b/lib/Transforms/IPO/Inliner.cpp @@ -0,0 +1,217 @@ +//===- Inliner.cpp - Code common to all inliners --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the mechanics required to implement inlining without +// missing any calls and updating the call graph. The decisions of which calls +// are profitable to inline are implemented elsewhere. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <set> +using namespace llvm; + +STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); + +namespace { + cl::opt<unsigned> // FIXME: 200 is VERY conservative + InlineLimit("inline-threshold", cl::Hidden, cl::init(200), + cl::desc("Control the amount of inlining to perform (default = 200)")); +} + +Inliner::Inliner(const void *ID) + : CallGraphSCCPass((intptr_t)ID), InlineThreshold(InlineLimit) {} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired<TargetData>(); + CallGraphSCCPass::getAnalysisUsage(Info); +} + +// InlineCallIfPossible - If it is possible to inline the specified call site, +// do so and update the CallGraph for this operation. +static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, + const std::set<Function*> &SCCFunctions, + const TargetData &TD) { + Function *Callee = CS.getCalledFunction(); + if (!InlineFunction(CS, &CG, &TD)) return false; + + // If we inlined the last possible call site to the function, delete the + // function body now. + if (Callee->use_empty() && Callee->hasInternalLinkage() && + !SCCFunctions.count(Callee)) { + DOUT << " -> Deleting dead function: " << Callee->getName() << "\n"; + + // Remove any call graph edges from the callee to its callees. + CallGraphNode *CalleeNode = CG[Callee]; + while (CalleeNode->begin() != CalleeNode->end()) + CalleeNode->removeCallEdgeTo((CalleeNode->end()-1)->second); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; + } + return true; +} + +bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { + CallGraph &CG = getAnalysis<CallGraph>(); + + std::set<Function*> SCCFunctions; + DOUT << "Inliner visiting SCC:"; + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F) SCCFunctions.insert(F); + DOUT << " " << (F ? F->getName() : "INDIRECTNODE"); + } + + // Scan through and identify all call sites ahead of time so that we only + // inline call sites in the original functions, not call sites that result + // from inlining other functions. + std::vector<CallSite> CallSites; + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + if (Function *F = SCC[i]->getFunction()) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && (!CS.getCalledFunction() || + !CS.getCalledFunction()->isDeclaration())) + CallSites.push_back(CS); + } + + DOUT << ": " << CallSites.size() << " call sites.\n"; + + // Now that we have all of the call sites, move the ones to functions in the + // current SCC to the end of the list. + unsigned FirstCallInSCC = CallSites.size(); + for (unsigned i = 0; i < FirstCallInSCC; ++i) + if (Function *F = CallSites[i].getCalledFunction()) + if (SCCFunctions.count(F)) + std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + // Now that we have all of the call sites, loop over them and inline them if + // it looks profitable to do so. + bool Changed = false; + bool LocalChange; + do { + LocalChange = false; + // Iterate over the outer loop because inlining functions can cause indirect + // calls to become direct calls. + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) + if (Function *Callee = CallSites[CSi].getCalledFunction()) { + // Calls to external functions are never inlinable. + if (Callee->isDeclaration() || + CallSites[CSi].getInstruction()->getParent()->getParent() ==Callee){ + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + // Keep the 'in SCC / not in SCC' boundary correct. + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + continue; + } + + // If the policy determines that we should inline this function, + // try to do so. + CallSite CS = CallSites[CSi]; + int InlineCost = getInlineCost(CS); + if (InlineCost >= (int)InlineThreshold) { + DOUT << " NOT Inlining: cost=" << InlineCost + << ", Call: " << *CS.getInstruction(); + } else { + DOUT << " Inlining: cost=" << InlineCost + << ", Call: " << *CS.getInstruction(); + + // Attempt to inline the function... + if (InlineCallIfPossible(CS, CG, SCCFunctions, + getAnalysis<TargetData>())) { + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + ++NumInlined; + Changed = true; + LocalChange = true; + } + } + } + } while (LocalChange); + + return Changed; +} + +// doFinalization - Remove now-dead linkonce functions at the end of +// processing to avoid breaking the SCC traversal. +bool Inliner::doFinalization(CallGraph &CG) { + std::set<CallGraphNode*> FunctionsToRemove; + + // Scan for all of the functions, looking for ones that should now be removed + // from the program. Insert the dead ones in the FunctionsToRemove set. + for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { + CallGraphNode *CGN = I->second; + if (Function *F = CGN ? CGN->getFunction() : 0) { + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if ((F->hasLinkOnceLinkage() || F->hasInternalLinkage()) && + F->use_empty()) { + + // Remove any call graph edges from the function to its callees. + while (CGN->begin() != CGN->end()) + CGN->removeCallEdgeTo((CGN->end()-1)->second); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); + + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.insert(CGN); + } + } + } + + // Now that we know which functions to delete, do so. We didn't want to do + // this inline, because that would invalidate our CallGraph::iterator + // objects. :( + bool Changed = false; + for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); I != E; ++I) { + delete CG.removeFunctionFromModule(*I); + ++NumDeleted; + Changed = true; + } + + return Changed; +} diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp new file mode 100644 index 0000000..7b5392c --- /dev/null +++ b/lib/Transforms/IPO/Internalize.cpp @@ -0,0 +1,154 @@ +//===-- Internalize.cpp - Mark functions internal -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for a +// main function. If a main function is found, all other functions and all +// global variables with initializers are marked as internal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "internalize" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <fstream> +#include <set> +using namespace llvm; + +STATISTIC(NumFunctions, "Number of functions internalized"); +STATISTIC(NumGlobals , "Number of global vars internalized"); + +namespace { + + // APIFile - A file which contains a list of symbols that should not be marked + // external. + cl::opt<std::string> + APIFile("internalize-public-api-file", cl::value_desc("filename"), + cl::desc("A file containing list of symbol names to preserve")); + + // APIList - A list of symbols that should not be marked internal. + cl::list<std::string> + APIList("internalize-public-api-list", cl::value_desc("list"), + cl::desc("A list of symbol names to preserve"), + cl::CommaSeparated); + + class VISIBILITY_HIDDEN InternalizePass : public ModulePass { + std::set<std::string> ExternalNames; + bool DontInternalize; + public: + static char ID; // Pass identification, replacement for typeid + InternalizePass(bool InternalizeEverything = true); + InternalizePass(const std::vector <const char *>& exportList); + void LoadFile(const char *Filename); + virtual bool runOnModule(Module &M); + }; + char InternalizePass::ID = 0; + RegisterPass<InternalizePass> X("internalize", "Internalize Global Symbols"); +} // end anonymous namespace + +InternalizePass::InternalizePass(bool InternalizeEverything) + : ModulePass((intptr_t)&ID), DontInternalize(false){ + if (!APIFile.empty()) // If a filename is specified, use it + LoadFile(APIFile.c_str()); + else if (!APIList.empty()) // Else, if a list is specified, use it. + ExternalNames.insert(APIList.begin(), APIList.end()); + else if (!InternalizeEverything) + // Finally, if we're allowed to, internalize all but main. + DontInternalize = true; +} + +InternalizePass::InternalizePass(const std::vector<const char *>&exportList) + : ModulePass((intptr_t)&ID), DontInternalize(false){ + for(std::vector<const char *>::const_iterator itr = exportList.begin(); + itr != exportList.end(); itr++) { + ExternalNames.insert(*itr); + } +} + +void InternalizePass::LoadFile(const char *Filename) { + // Load the APIFile... + std::ifstream In(Filename); + if (!In.good()) { + cerr << "WARNING: Internalize couldn't load file '" << Filename << "'!\n"; + return; // Do not internalize anything... + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } +} + +bool InternalizePass::runOnModule(Module &M) { + if (DontInternalize) return false; + + // If no list or file of symbols was specified, check to see if there is a + // "main" symbol defined in the module. If so, use it, otherwise do not + // internalize the module, it must be a library or something. + // + if (ExternalNames.empty()) { + Function *MainFunc = M.getFunction("main"); + if (MainFunc == 0 || MainFunc->isDeclaration()) + return false; // No main found, must be a library... + + // Preserve main, internalize all else. + ExternalNames.insert(MainFunc->getName()); + } + + bool Changed = false; + + // Found a main function, mark all functions not named main as internal. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && // Function must be defined here + !I->hasInternalLinkage() && // Can't already have internal linkage + !ExternalNames.count(I->getName())) {// Not marked to keep external? + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumFunctions; + DOUT << "Internalizing func " << I->getName() << "\n"; + } + + // Never internalize the llvm.used symbol. It is used to implement + // attribute((used)). + ExternalNames.insert("llvm.used"); + + // Never internalize anchors used by the machine module info, else the info + // won't find them. (see MachineModuleInfo.) + ExternalNames.insert("llvm.dbg.compile_units"); + ExternalNames.insert("llvm.dbg.global_variables"); + ExternalNames.insert("llvm.dbg.subprograms"); + ExternalNames.insert("llvm.global_ctors"); + ExternalNames.insert("llvm.global_dtors"); + + // Mark all global variables with initializers as internal as well. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasInternalLinkage() && + !ExternalNames.count(I->getName())) { + I->setLinkage(GlobalValue::InternalLinkage); + Changed = true; + ++NumGlobals; + DOUT << "Internalized gvar " << I->getName() << "\n"; + } + + return Changed; +} + +ModulePass *llvm::createInternalizePass(bool InternalizeEverything) { + return new InternalizePass(InternalizeEverything); +} + +ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) { + return new InternalizePass(el); +} diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp new file mode 100644 index 0000000..7b14ce0 --- /dev/null +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -0,0 +1,201 @@ +//===- LoopExtractor.cpp - Extract each loop into a new function ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pass wrapper around the ExtractLoop() scalar transformation to extract each +// top-level loop into its own new function. If the loop is the ONLY loop in a +// given function, it is not touched. This is a pass most useful for debugging +// via bugpoint. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-extract" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumExtracted, "Number of loops extracted"); + +namespace { + // FIXME: This is not a function pass, but the PassManager doesn't allow + // Module passes to require FunctionPasses, so we can't get loop info if we're + // not a function pass. + struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + unsigned NumLoops; + + LoopExtractor(unsigned numLoops = ~0) + : FunctionPass((intptr_t)&ID), NumLoops(numLoops) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(BreakCriticalEdgesID); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + } + }; + + char LoopExtractor::ID = 0; + RegisterPass<LoopExtractor> + X("loop-extract", "Extract loops into new functions"); + + /// SingleLoopExtractor - For bugpoint. + struct SingleLoopExtractor : public LoopExtractor { + static char ID; // Pass identification, replacement for typeid + SingleLoopExtractor() : LoopExtractor(1) {} + }; + + char SingleLoopExtractor::ID = 0; + RegisterPass<SingleLoopExtractor> + Y("loop-extract-single", "Extract at most one loop into a new function"); +} // End anonymous namespace + +// createLoopExtractorPass - This pass extracts all natural loops from the +// program into a function if it can. +// +FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } + +bool LoopExtractor::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis<LoopInfo>(); + + // If this function has no loops, there is nothing to do. + if (LI.begin() == LI.end()) + return false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + + // If there is more than one top-level loop in this function, extract all of + // the loops. + bool Changed = false; + if (LI.end()-LI.begin() > 1) { + for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, *i) != 0; + ++NumExtracted; + } + } else { + // Otherwise there is exactly one top-level loop. If this function is more + // than a minimal wrapper around the loop, extract the loop. + Loop *TLL = *LI.begin(); + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = F.getEntryBlock().getTerminator(); + if (!isa<BranchInst>(EntryTI) || + !cast<BranchInst>(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != TLL->getHeader()) + ShouldExtractLoop = true; + else { + // Check to see if any exits from the loop are more than just return + // blocks. + std::vector<BasicBlock*> ExitBlocks; + TLL->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; + } + } + + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, TLL) != 0; + ++NumExtracted; + } else { + // Okay, this function is a minimal container around the specified loop. + // If we extract the loop, we will continue to just keep extracting it + // infinitely... so don't extract it. However, if the loop contains any + // subloops, extract them. + for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) { + if (NumLoops == 0) return Changed; + --NumLoops; + Changed |= ExtractLoop(DT, *i) != 0; + ++NumExtracted; + } + } + } + + return Changed; +} + +// createSingleLoopExtractorPass - This pass extracts one natural loop from the +// program into a function if it can. This is used by bugpoint. +// +FunctionPass *llvm::createSingleLoopExtractorPass() { + return new SingleLoopExtractor(); +} + + +namespace { + /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks + /// from the module into their own functions except for those specified by the + /// BlocksToNotExtract list. + class BlockExtractorPass : public ModulePass { + std::vector<BasicBlock*> BlocksToNotExtract; + public: + static char ID; // Pass identification, replacement for typeid + BlockExtractorPass(std::vector<BasicBlock*> &B) + : ModulePass((intptr_t)&ID), BlocksToNotExtract(B) {} + BlockExtractorPass() : ModulePass((intptr_t)&ID) {} + + bool runOnModule(Module &M); + }; + + char BlockExtractorPass::ID = 0; + RegisterPass<BlockExtractorPass> + XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)"); +} + +// createBlockExtractorPass - This pass extracts all blocks (except those +// specified in the argument list) from the functions in the module. +// +ModulePass *llvm::createBlockExtractorPass(std::vector<BasicBlock*> &BTNE) { + return new BlockExtractorPass(BTNE); +} + +bool BlockExtractorPass::runOnModule(Module &M) { + std::set<BasicBlock*> TranslatedBlocksToNotExtract; + for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { + BasicBlock *BB = BlocksToNotExtract[i]; + Function *F = BB->getParent(); + + // Map the corresponding function in this module. + Function *MF = M.getFunction(F->getName()); + assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?"); + + // Figure out which index the basic block is in its function. + Function::iterator BBI = MF->begin(); + std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); + TranslatedBlocksToNotExtract.insert(BBI); + } + + // Now that we know which blocks to not extract, figure out which ones we WANT + // to extract. + std::vector<BasicBlock*> BlocksToExtract; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (!TranslatedBlocksToNotExtract.count(BB)) + BlocksToExtract.push_back(BB); + + for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) + ExtractBasicBlock(BlocksToExtract[i]); + + return !BlocksToExtract.empty(); +} diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp new file mode 100644 index 0000000..0243980 --- /dev/null +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -0,0 +1,534 @@ +//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering of setjmp and longjmp to use the +// LLVM invoke and unwind instructions as necessary. +// +// Lowering of longjmp is fairly trivial. We replace the call with a +// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()". +// This unwinds the stack for us calling all of the destructors for +// objects allocated on the stack. +// +// At a setjmp call, the basic block is split and the setjmp removed. +// The calls in a function that have a setjmp are converted to invoke +// where the except part checks to see if it's a longjmp exception and, +// if so, if it's handled in the function. If it is, then it gets the +// value returned by the longjmp and goes to where the basic block was +// split. Invoke instructions are handled in a similar fashion with the +// original except block being executed if it isn't a longjmp except +// that is handled by that function. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FIXME: This pass doesn't deal with PHI statements just yet. That is, +// we expect this to occur before SSAification is done. This would seem +// to make sense, but in general, it might be a good idea to make this +// pass invokable via the "opt" command at will. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowersetjmp" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/VectorExtras.h" +using namespace llvm; + +STATISTIC(LongJmpsTransformed, "Number of longjmps transformed"); +STATISTIC(SetJmpsTransformed , "Number of setjmps transformed"); +STATISTIC(CallsTransformed , "Number of calls invokified"); +STATISTIC(InvokesTransformed , "Number of invokes modified"); + +namespace { + //===--------------------------------------------------------------------===// + // LowerSetJmp pass implementation. + class VISIBILITY_HIDDEN LowerSetJmp : public ModulePass, + public InstVisitor<LowerSetJmp> { + // LLVM library functions... + Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap + Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap + Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map + Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp + Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception + Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception + Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value + + typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair; + + // Keep track of those basic blocks reachable via a depth-first search of + // the CFG from a setjmp call. We only need to transform those "call" and + // "invoke" instructions that are reachable from the setjmp call site. + std::set<BasicBlock*> DFSBlocks; + + // The setjmp map is going to hold information about which setjmps + // were called (each setjmp gets its own number) and with which + // buffer it was called. + std::map<Function*, AllocaInst*> SJMap; + + // The rethrow basic block map holds the basic block to branch to if + // the exception isn't handled in the current function and needs to + // be rethrown. + std::map<const Function*, BasicBlock*> RethrowBBMap; + + // The preliminary basic block map holds a basic block that grabs the + // exception and determines if it's handled by the current function. + std::map<const Function*, BasicBlock*> PrelimBBMap; + + // The switch/value map holds a switch inst/call inst pair. The + // switch inst controls which handler (if any) gets called and the + // value is the value returned to that handler by the call to + // __llvm_sjljeh_get_longjmp_value. + std::map<const Function*, SwitchValuePair> SwitchValMap; + + // A map of which setjmps we've seen so far in a function. + std::map<const Function*, unsigned> SetJmpIDMap; + + AllocaInst* GetSetJmpMap(Function* Func); + BasicBlock* GetRethrowBB(Function* Func); + SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow); + + void TransformLongJmpCall(CallInst* Inst); + void TransformSetJmpCall(CallInst* Inst); + + bool IsTransformableFunction(const std::string& Name); + public: + static char ID; // Pass identification, replacement for typeid + LowerSetJmp() : ModulePass((intptr_t)&ID) {} + + void visitCallInst(CallInst& CI); + void visitInvokeInst(InvokeInst& II); + void visitReturnInst(ReturnInst& RI); + void visitUnwindInst(UnwindInst& UI); + + bool runOnModule(Module& M); + bool doInitialization(Module& M); + }; + + char LowerSetJmp::ID = 0; + RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump"); +} // end anonymous namespace + +// run - Run the transformation on the program. We grab the function +// prototypes for longjmp and setjmp. If they are used in the program, +// then we can go directly to the places they're at and transform them. +bool LowerSetJmp::runOnModule(Module& M) { + bool Changed = false; + + // These are what the functions are called. + Function* SetJmp = M.getFunction("llvm.setjmp"); + Function* LongJmp = M.getFunction("llvm.longjmp"); + + // This program doesn't have longjmp and setjmp calls. + if ((!LongJmp || LongJmp->use_empty()) && + (!SetJmp || SetJmp->use_empty())) return false; + + // Initialize some values and functions we'll need to transform the + // setjmp/longjmp functions. + doInitialization(M); + + if (SetJmp) { + for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); + B != E; ++B) { + BasicBlock* BB = cast<Instruction>(*B)->getParent(); + for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), + E = df_ext_end(BB, DFSBlocks); I != E; ++I) + /* empty */; + } + + while (!SetJmp->use_empty()) { + assert(isa<CallInst>(SetJmp->use_back()) && + "User of setjmp intrinsic not a call?"); + TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); + Changed = true; + } + } + + if (LongJmp) + while (!LongJmp->use_empty()) { + assert(isa<CallInst>(LongJmp->use_back()) && + "User of longjmp intrinsic not a call?"); + TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); + Changed = true; + } + + // Now go through the affected functions and convert calls and invokes + // to new invokes... + for (std::map<Function*, AllocaInst*>::iterator + B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { + Function* F = B->first; + for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) + for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { + visit(*IB++); + if (IB != BB->end() && IB->getParent() != BB) + break; // The next instruction got moved to a different block! + } + } + + DFSBlocks.clear(); + SJMap.clear(); + RethrowBBMap.clear(); + PrelimBBMap.clear(); + SwitchValMap.clear(); + SetJmpIDMap.clear(); + + return Changed; +} + +// doInitialization - For the lower long/setjmp pass, this ensures that a +// module contains a declaration for the intrisic functions we are going +// to call to convert longjmp and setjmp calls. +// +// This function is always successful, unless it isn't. +bool LowerSetJmp::doInitialization(Module& M) +{ + const Type *SBPTy = PointerType::get(Type::Int8Ty); + const Type *SBPPTy = PointerType::get(SBPTy); + + // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for + // a description of the following library functions. + + // void __llvm_sjljeh_init_setjmpmap(void**) + InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", + Type::VoidTy, SBPPTy, (Type *)0); + // void __llvm_sjljeh_destroy_setjmpmap(void**) + DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", + Type::VoidTy, SBPPTy, (Type *)0); + + // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) + AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", + Type::VoidTy, SBPPTy, SBPTy, + Type::Int32Ty, (Type *)0); + + // void __llvm_sjljeh_throw_longjmp(int*, int) + ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", + Type::VoidTy, SBPTy, Type::Int32Ty, + (Type *)0); + + // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) + TryCatchLJ = + M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", + Type::Int32Ty, SBPPTy, (Type *)0); + + // bool __llvm_sjljeh_is_longjmp_exception() + IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", + Type::Int1Ty, (Type *)0); + + // int __llvm_sjljeh_get_longjmp_value() + GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", + Type::Int32Ty, (Type *)0); + return true; +} + +// IsTransformableFunction - Return true if the function name isn't one +// of the ones we don't want transformed. Currently, don't transform any +// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error +// handling functions (beginning with __llvm_sjljeh_...they don't throw +// exceptions). +bool LowerSetJmp::IsTransformableFunction(const std::string& Name) { + std::string SJLJEh("__llvm_sjljeh"); + + if (Name.size() > SJLJEh.size()) + return std::string(Name.begin(), Name.begin() + SJLJEh.size()) != SJLJEh; + + return true; +} + +// TransformLongJmpCall - Transform a longjmp call into a call to the +// internal __llvm_sjljeh_throw_longjmp function. It then takes care of +// throwing the exception for us. +void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) +{ + const Type* SBPTy = PointerType::get(Type::Int8Ty); + + // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the + // same parameters as "longjmp", except that the buffer is cast to a + // char*. It returns "void", so it doesn't need to replace any of + // Inst's uses and doesn't get a name. + CastInst* CI = + new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); + new CallInst(ThrowLongJmp, CI, Inst->getOperand(2), "", Inst); + + SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; + + // If the function has a setjmp call in it (they are transformed first) + // we should branch to the basic block that determines if this longjmp + // is applicable here. Otherwise, issue an unwind. + if (SVP.first) + new BranchInst(SVP.first->getParent(), Inst); + else + new UnwindInst(Inst); + + // Remove all insts after the branch/unwind inst. Go from back to front to + // avoid replaceAllUsesWith if possible. + BasicBlock *BB = Inst->getParent(); + Instruction *Removed; + do { + Removed = &BB->back(); + // If the removed instructions have any users, replace them now. + if (!Removed->use_empty()) + Removed->replaceAllUsesWith(UndefValue::get(Removed->getType())); + Removed->eraseFromParent(); + } while (Removed != Inst); + + ++LongJmpsTransformed; +} + +// GetSetJmpMap - Retrieve (create and initialize, if necessary) the +// setjmp map. This map is going to hold information about which setjmps +// were called (each setjmp gets its own number) and with which buffer it +// was called. There can be only one! +AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) +{ + if (SJMap[Func]) return SJMap[Func]; + + // Insert the setjmp map initialization before the first instruction in + // the function. + Instruction* Inst = Func->getEntryBlock().begin(); + assert(Inst && "Couldn't find even ONE instruction in entry block!"); + + // Fill in the alloca and call to initialize the SJ map. + const Type *SBPTy = PointerType::get(Type::Int8Ty); + AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); + new CallInst(InitSJMap, Map, "", Inst); + return SJMap[Func] = Map; +} + +// GetRethrowBB - Only one rethrow basic block is needed per function. +// If this is a longjmp exception but not handled in this block, this BB +// performs the rethrow. +BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) +{ + if (RethrowBBMap[Func]) return RethrowBBMap[Func]; + + // The basic block we're going to jump to if we need to rethrow the + // exception. + BasicBlock* Rethrow = new BasicBlock("RethrowExcept", Func); + + // Fill in the "Rethrow" BB with a call to rethrow the exception. This + // is the last instruction in the BB since at this point the runtime + // should exit this function and go to the next function. + new UnwindInst(Rethrow); + return RethrowBBMap[Func] = Rethrow; +} + +// GetSJSwitch - Return the switch statement that controls which handler +// (if any) gets called and the value returned to that handler. +LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, + BasicBlock* Rethrow) +{ + if (SwitchValMap[Func].first) return SwitchValMap[Func]; + + BasicBlock* LongJmpPre = new BasicBlock("LongJmpBlkPre", Func); + BasicBlock::InstListType& LongJmpPreIL = LongJmpPre->getInstList(); + + // Keep track of the preliminary basic block for some of the other + // transformations. + PrelimBBMap[Func] = LongJmpPre; + + // Grab the exception. + CallInst* Cond = new CallInst(IsLJException, "IsLJExcept"); + LongJmpPreIL.push_back(Cond); + + // The "decision basic block" gets the number associated with the + // setjmp call returning to switch on and the value returned by + // longjmp. + BasicBlock* DecisionBB = new BasicBlock("LJDecisionBB", Func); + BasicBlock::InstListType& DecisionBBIL = DecisionBB->getInstList(); + + new BranchInst(DecisionBB, Rethrow, Cond, LongJmpPre); + + // Fill in the "decision" basic block. + CallInst* LJVal = new CallInst(GetLJValue, "LJVal"); + DecisionBBIL.push_back(LJVal); + CallInst* SJNum = new CallInst(TryCatchLJ, GetSetJmpMap(Func), "SJNum"); + DecisionBBIL.push_back(SJNum); + + SwitchInst* SI = new SwitchInst(SJNum, Rethrow, 0, DecisionBB); + return SwitchValMap[Func] = SwitchValuePair(SI, LJVal); +} + +// TransformSetJmpCall - The setjmp call is a bit trickier to transform. +// We're going to convert all setjmp calls to nops. Then all "call" and +// "invoke" instructions in the function are converted to "invoke" where +// the "except" branch is used when returning from a longjmp call. +void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) +{ + BasicBlock* ABlock = Inst->getParent(); + Function* Func = ABlock->getParent(); + + // Add this setjmp to the setjmp map. + const Type* SBPTy = PointerType::get(Type::Int8Ty); + CastInst* BufPtr = + new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + std::vector<Value*> Args = + make_vector<Value*>(GetSetJmpMap(Func), BufPtr, + ConstantInt::get(Type::Int32Ty, + SetJmpIDMap[Func]++), 0); + new CallInst(AddSJToMap, &Args[0], Args.size(), "", Inst); + + // We are guaranteed that there are no values live across basic blocks + // (because we are "not in SSA form" yet), but there can still be values live + // in basic blocks. Because of this, splitting the setjmp block can cause + // values above the setjmp to not dominate uses which are after the setjmp + // call. For all of these occasions, we must spill the value to the stack. + // + std::set<Instruction*> InstrsAfterCall; + + // The call is probably very close to the end of the basic block, for the + // common usage pattern of: 'if (setjmp(...))', so keep track of the + // instructions after the call. + for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end(); + I != E; ++I) + InstrsAfterCall.insert(I); + + for (BasicBlock::iterator II = ABlock->begin(); + II != BasicBlock::iterator(Inst); ++II) + // Loop over all of the uses of instruction. If any of them are after the + // call, "spill" the value to the stack. + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != ABlock || + InstrsAfterCall.count(cast<Instruction>(*UI))) { + DemoteRegToStack(*II); + break; + } + InstrsAfterCall.clear(); + + // Change the setjmp call into a branch statement. We'll remove the + // setjmp call in a little bit. No worries. + BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst); + assert(SetJmpContBlock && "Couldn't split setjmp BB!!"); + + SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont"); + + // Add the SetJmpContBlock to the set of blocks reachable from a setjmp. + DFSBlocks.insert(SetJmpContBlock); + + // This PHI node will be in the new block created from the + // splitBasicBlock call. + PHINode* PHI = new PHINode(Type::Int32Ty, "SetJmpReturn", Inst); + + // Coming from a call to setjmp, the return is 0. + PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock); + + // Add the case for this setjmp's number... + SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); + SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1), + SetJmpContBlock); + + // Value coming from the handling of the exception. + PHI->addIncoming(SVP.second, SVP.second->getParent()); + + // Replace all uses of this instruction with the PHI node created by + // the eradication of setjmp. + Inst->replaceAllUsesWith(PHI); + Inst->getParent()->getInstList().erase(Inst); + + ++SetJmpsTransformed; +} + +// visitCallInst - This converts all LLVM call instructions into invoke +// instructions. The except part of the invoke goes to the "LongJmpBlkPre" +// that grabs the exception and proceeds to determine if it's a longjmp +// exception or not. +void LowerSetJmp::visitCallInst(CallInst& CI) +{ + if (CI.getCalledFunction()) + if (!IsTransformableFunction(CI.getCalledFunction()->getName()) || + CI.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* OldBB = CI.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(OldBB)) return; + + BasicBlock* NewBB = OldBB->splitBasicBlock(CI); + assert(NewBB && "Couldn't split BB of \"call\" instruction!!"); + DFSBlocks.insert(NewBB); + NewBB->setName("Call2Invoke"); + + Function* Func = OldBB->getParent(); + + // Construct the new "invoke" instruction. + TerminatorInst* Term = OldBB->getTerminator(); + std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end()); + InvokeInst* II = new + InvokeInst(CI.getCalledValue(), NewBB, PrelimBBMap[Func], + &Params[0], Params.size(), CI.getName(), Term); + + // Replace the old call inst with the invoke inst and remove the call. + CI.replaceAllUsesWith(II); + CI.getParent()->getInstList().erase(&CI); + + // The old terminator is useless now that we have the invoke inst. + Term->getParent()->getInstList().erase(Term); + ++CallsTransformed; +} + +// visitInvokeInst - Converting the "invoke" instruction is fairly +// straight-forward. The old exception part is replaced by a query asking +// if this is a longjmp exception. If it is, then it goes to the longjmp +// exception blocks. Otherwise, control is passed the old exception. +void LowerSetJmp::visitInvokeInst(InvokeInst& II) +{ + if (II.getCalledFunction()) + if (!IsTransformableFunction(II.getCalledFunction()->getName()) || + II.getCalledFunction()->isIntrinsic()) return; + + BasicBlock* BB = II.getParent(); + + // If not reachable from a setjmp call, don't transform. + if (!DFSBlocks.count(BB)) return; + + BasicBlock* ExceptBB = II.getUnwindDest(); + + Function* Func = BB->getParent(); + BasicBlock* NewExceptBB = new BasicBlock("InvokeExcept", Func); + BasicBlock::InstListType& InstList = NewExceptBB->getInstList(); + + // If this is a longjmp exception, then branch to the preliminary BB of + // the longjmp exception handling. Otherwise, go to the old exception. + CallInst* IsLJExcept = new CallInst(IsLJException, "IsLJExcept"); + InstList.push_back(IsLJExcept); + + new BranchInst(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB); + + II.setUnwindDest(NewExceptBB); + ++InvokesTransformed; +} + +// visitReturnInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitReturnInst(ReturnInst &RI) { + Function* Func = RI.getParent()->getParent(); + new CallInst(DestroySJMap, GetSetJmpMap(Func), "", &RI); +} + +// visitUnwindInst - We want to destroy the setjmp map upon exit from the +// function. +void LowerSetJmp::visitUnwindInst(UnwindInst &UI) { + Function* Func = UI.getParent()->getParent(); + new CallInst(DestroySJMap, GetSetJmpMap(Func), "", &UI); +} + +ModulePass *llvm::createLowerSetJmpPass() { + return new LowerSetJmp(); +} + diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile new file mode 100644 index 0000000..22a76d3 --- /dev/null +++ b/lib/Transforms/IPO/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMipo +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp new file mode 100644 index 0000000..a783272 --- /dev/null +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -0,0 +1,233 @@ +//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple interprocedural pass which walks the +// call-graph, turning invoke instructions into calls, iff the callee cannot +// throw an exception. It implements this as a bottom-up traversal of the +// call-graph. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "prune-eh" +#include "llvm/Transforms/IPO.h" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include <set> +#include <algorithm> +using namespace llvm; + +STATISTIC(NumRemoved, "Number of invokes removed"); +STATISTIC(NumUnreach, "Number of noreturn calls optimized"); + +namespace { + struct VISIBILITY_HIDDEN PruneEH : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + PruneEH() : CallGraphSCCPass((intptr_t)&ID) {} + + /// DoesNotUnwind - This set contains all of the functions which we have + /// determined cannot unwind. + std::set<CallGraphNode*> DoesNotUnwind; + + /// DoesNotReturn - This set contains all of the functions which we have + /// determined cannot return normally (but might unwind). + std::set<CallGraphNode*> DoesNotReturn; + + // runOnSCC - Analyze the SCC, performing the transformation if possible. + bool runOnSCC(const std::vector<CallGraphNode *> &SCC); + + bool SimplifyFunction(Function *F); + void DeleteBasicBlock(BasicBlock *BB); + }; + + char PruneEH::ID = 0; + RegisterPass<PruneEH> X("prune-eh", "Remove unused exception handling info"); +} + +Pass *llvm::createPruneEHPass() { return new PruneEH(); } + + +bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) { + CallGraph &CG = getAnalysis<CallGraph>(); + bool MadeChange = false; + + // First pass, scan all of the functions in the SCC, simplifying them + // according to what we know. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + if (Function *F = SCC[i]->getFunction()) + MadeChange |= SimplifyFunction(F); + + // Next, check to see if any callees might throw or if there are any external + // functions in this SCC: if so, we cannot prune any functions in this SCC. + // If this SCC includes the unwind instruction, we KNOW it throws, so + // obviously the SCC might throw. + // + bool SCCMightUnwind = false, SCCMightReturn = false; + for (unsigned i = 0, e = SCC.size(); + (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (F == 0 || (F->isDeclaration() && !F->getIntrinsicID())) { + SCCMightUnwind = true; + SCCMightReturn = true; + } else { + if (F->isDeclaration()) + SCCMightReturn = true; + + // Check to see if this function performs an unwind or calls an + // unwinding function. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (isa<UnwindInst>(BB->getTerminator())) { // Uses unwind! + SCCMightUnwind = true; + } else if (isa<ReturnInst>(BB->getTerminator())) { + SCCMightReturn = true; + } + + // Invoke instructions don't allow unwinding to continue, so we are + // only interested in call instructions. + if (!SCCMightUnwind) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (Function *Callee = CI->getCalledFunction()) { + CallGraphNode *CalleeNode = CG[Callee]; + // If the callee is outside our current SCC, or if it is not + // known to throw, then we might throw also. + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()&& + !DoesNotUnwind.count(CalleeNode)) { + SCCMightUnwind = true; + break; + } + } else { + // Indirect call, it might throw. + SCCMightUnwind = true; + break; + } + } + if (SCCMightUnwind && SCCMightReturn) break; + } + } + } + + // If the SCC doesn't unwind or doesn't throw, note this fact. + if (!SCCMightUnwind) + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + DoesNotUnwind.insert(SCC[i]); + if (!SCCMightReturn) + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + DoesNotReturn.insert(SCC[i]); + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + // Convert any invoke instructions to non-throwing functions in this node + // into call instructions with a branch. This makes the exception blocks + // dead. + if (Function *F = SCC[i]->getFunction()) + MadeChange |= SimplifyFunction(F); + } + + return MadeChange; +} + + +// SimplifyFunction - Given information about callees, simplify the specified +// function if we have invokes to non-unwinding functions or code after calls to +// no-return functions. +bool PruneEH::SimplifyFunction(Function *F) { + CallGraph &CG = getAnalysis<CallGraph>(); + bool MadeChange = false; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) + if (Function *F = II->getCalledFunction()) + if (DoesNotUnwind.count(CG[F])) { + SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end()); + // Insert a call instruction before the invoke. + CallInst *Call = new CallInst(II->getCalledValue(), + &Args[0], Args.size(), "", II); + Call->takeName(II); + Call->setCallingConv(II->getCallingConv()); + + // Anything that used the value produced by the invoke instruction + // now uses the value produced by the call instruction. + II->replaceAllUsesWith(Call); + BasicBlock *UnwindBlock = II->getUnwindDest(); + UnwindBlock->removePredecessor(II->getParent()); + + // Insert a branch to the normal destination right before the + // invoke. + new BranchInst(II->getNormalDest(), II); + + // Finally, delete the invoke instruction! + BB->getInstList().pop_back(); + + // If the unwind block is now dead, nuke it. + if (pred_begin(UnwindBlock) == pred_end(UnwindBlock)) + DeleteBasicBlock(UnwindBlock); // Delete the new BB. + + ++NumRemoved; + MadeChange = true; + } + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) + if (CallInst *CI = dyn_cast<CallInst>(I++)) + if (Function *Callee = CI->getCalledFunction()) + if (DoesNotReturn.count(CG[Callee]) && !isa<UnreachableInst>(I)) { + // This call calls a function that cannot return. Insert an + // unreachable instruction after it and simplify the code. Do this + // by splitting the BB, adding the unreachable, then deleting the + // new BB. + BasicBlock *New = BB->splitBasicBlock(I); + + // Remove the uncond branch and add an unreachable. + BB->getInstList().pop_back(); + new UnreachableInst(BB); + + DeleteBasicBlock(New); // Delete the new BB. + MadeChange = true; + ++NumUnreach; + break; + } + + } + return MadeChange; +} + +/// DeleteBasicBlock - remove the specified basic block from the program, +/// updating the callgraph to reflect any now-obsolete edges due to calls that +/// exist in the BB. +void PruneEH::DeleteBasicBlock(BasicBlock *BB) { + assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); + CallGraph &CG = getAnalysis<CallGraph>(); + + CallGraphNode *CGN = CG[BB->getParent()]; + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { + --I; + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (Function *Callee = CI->getCalledFunction()) + CGN->removeCallEdgeTo(CG[Callee]); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { + if (Function *Callee = II->getCalledFunction()) + CGN->removeCallEdgeTo(CG[Callee]); + } + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + + // Get the list of successors of this block. + std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + Succs[i]->removePredecessor(BB); + + BB->eraseFromParent(); +} diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp new file mode 100644 index 0000000..5d2d9dd --- /dev/null +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -0,0 +1,249 @@ +//===- RaiseAllocations.cpp - Convert %malloc & %free calls to insts ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RaiseAllocations pass which convert malloc and free +// calls to malloc and free instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "raiseallocs" +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumRaised, "Number of allocations raised"); + +namespace { + // RaiseAllocations - Turn %malloc and %free calls into the appropriate + // instruction. + // + class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass { + Function *MallocFunc; // Functions in the module we are processing + Function *FreeFunc; // Initialized by doPassInitializationVirt + public: + static char ID; // Pass identification, replacement for typeid + RaiseAllocations() + : ModulePass((intptr_t)&ID), MallocFunc(0), FreeFunc(0) {} + + // doPassInitialization - For the raise allocations pass, this finds a + // declaration for malloc and free if they exist. + // + void doInitialization(Module &M); + + // run - This method does the actual work of converting instructions over. + // + bool runOnModule(Module &M); + }; + + char RaiseAllocations::ID = 0; + RegisterPass<RaiseAllocations> + X("raiseallocs", "Raise allocations from calls to instructions"); +} // end anonymous namespace + + +// createRaiseAllocationsPass - The interface to this file... +ModulePass *llvm::createRaiseAllocationsPass() { + return new RaiseAllocations(); +} + + +// If the module has a symbol table, they might be referring to the malloc and +// free functions. If this is the case, grab the method pointers that the +// module is using. +// +// Lookup %malloc and %free in the symbol table, for later use. If they don't +// exist, or are not external, we do not worry about converting calls to that +// function into the appropriate instruction. +// +void RaiseAllocations::doInitialization(Module &M) { + + // Get Malloc and free prototypes if they exist! + MallocFunc = M.getFunction("malloc"); + if (MallocFunc) { + const FunctionType* TyWeHave = MallocFunc->getFunctionType(); + + // Get the expected prototype for malloc + const FunctionType *Malloc1Type = + FunctionType::get(PointerType::get(Type::Int8Ty), + std::vector<const Type*>(1, Type::Int64Ty), false); + + // Chck to see if we got the expected malloc + if (TyWeHave != Malloc1Type) { + // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc + // This handles the common declaration of: 'void *malloc(unsigned);' + const FunctionType *Malloc2Type = + FunctionType::get(PointerType::get(Type::Int8Ty), + std::vector<const Type*>(1, Type::Int32Ty), false); + if (TyWeHave != Malloc2Type) { + // Check to see if the prototype is missing, giving us + // sbyte*(...) * malloc + // This handles the common declaration of: 'void *malloc();' + const FunctionType *Malloc3Type = + FunctionType::get(PointerType::get(Type::Int8Ty), + std::vector<const Type*>(), true); + if (TyWeHave != Malloc3Type) + // Give up + MallocFunc = 0; + } + } + } + + FreeFunc = M.getFunction("free"); + if (FreeFunc) { + const FunctionType* TyWeHave = FreeFunc->getFunctionType(); + + // Get the expected prototype for void free(i8*) + const FunctionType *Free1Type = FunctionType::get(Type::VoidTy, + std::vector<const Type*>(1, PointerType::get(Type::Int8Ty)), false); + + if (TyWeHave != Free1Type) { + // Check to see if the prototype was forgotten, giving us + // void (...) * free + // This handles the common forward declaration of: 'void free();' + const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, + std::vector<const Type*>(),true); + + if (TyWeHave != Free2Type) { + // One last try, check to see if we can find free as + // int (...)* free. This handles the case where NOTHING was declared. + const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, + std::vector<const Type*>(),true); + + if (TyWeHave != Free3Type) { + // Give up. + FreeFunc = 0; + } + } + } + } + + // Don't mess with locally defined versions of these functions... + if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0; + if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0; +} + +// run - Transform calls into instructions... +// +bool RaiseAllocations::runOnModule(Module &M) { + // Find the malloc/free prototypes... + doInitialization(M); + + bool Changed = false; + + // First, process all of the malloc calls... + if (MallocFunc) { + std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end()); + std::vector<Value*> EqPointers; // Values equal to MallocFunc + while (!Users.empty()) { + User *U = Users.back(); + Users.pop_back(); + + if (Instruction *I = dyn_cast<Instruction>(U)) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && CS.arg_begin() != CS.arg_end() && + (CS.getCalledFunction() == MallocFunc || + std::find(EqPointers.begin(), EqPointers.end(), + CS.getCalledValue()) != EqPointers.end())) { + + Value *Source = *CS.arg_begin(); + + // If no prototype was provided for malloc, we may need to cast the + // source size. + if (Source->getType() != Type::Int32Ty) + Source = + CastInst::createIntegerCast(Source, Type::Int32Ty, false/*ZExt*/, + "MallocAmtCast", I); + + MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I); + MI->takeName(I); + I->replaceAllUsesWith(MI); + + // If the old instruction was an invoke, add an unconditional branch + // before the invoke, which will become the new terminator. + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + new BranchInst(II->getNormalDest(), I); + + // Delete the old call site + MI->getParent()->getInstList().erase(I); + Changed = true; + ++NumRaised; + } + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) { + Users.insert(Users.end(), GV->use_begin(), GV->use_end()); + EqPointers.push_back(GV); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->isCast()) { + Users.insert(Users.end(), CE->use_begin(), CE->use_end()); + EqPointers.push_back(CE); + } + } + } + } + + // Next, process all free calls... + if (FreeFunc) { + std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end()); + std::vector<Value*> EqPointers; // Values equal to FreeFunc + + while (!Users.empty()) { + User *U = Users.back(); + Users.pop_back(); + + if (Instruction *I = dyn_cast<Instruction>(U)) { + CallSite CS = CallSite::get(I); + if (CS.getInstruction() && CS.arg_begin() != CS.arg_end() && + (CS.getCalledFunction() == FreeFunc || + std::find(EqPointers.begin(), EqPointers.end(), + CS.getCalledValue()) != EqPointers.end())) { + + // If no prototype was provided for free, we may need to cast the + // source pointer. This should be really uncommon, but it's necessary + // just in case we are dealing with weird code like this: + // free((long)ptr); + // + Value *Source = *CS.arg_begin(); + if (!isa<PointerType>(Source->getType())) + Source = new IntToPtrInst(Source, PointerType::get(Type::Int8Ty), + "FreePtrCast", I); + new FreeInst(Source, I); + + // If the old instruction was an invoke, add an unconditional branch + // before the invoke, which will become the new terminator. + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + new BranchInst(II->getNormalDest(), I); + + // Delete the old call site + if (I->getType() != Type::VoidTy) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + Changed = true; + ++NumRaised; + } + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) { + Users.insert(Users.end(), GV->use_begin(), GV->use_end()); + EqPointers.push_back(GV); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->isCast()) { + Users.insert(Users.end(), CE->use_begin(), CE->use_end()); + EqPointers.push_back(CE); + } + } + } + } + + return Changed; +} diff --git a/lib/Transforms/IPO/SimplifyLibCalls.cpp b/lib/Transforms/IPO/SimplifyLibCalls.cpp new file mode 100644 index 0000000..b0f9128 --- /dev/null +++ b/lib/Transforms/IPO/SimplifyLibCalls.cpp @@ -0,0 +1,2021 @@ +//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a module pass that applies a variety of small +// optimizations for calls to specific well-known function calls (e.g. runtime +// library functions). For example, a call to the function "exit(3)" that +// occurs within the main() function can be transformed into a simple "return 3" +// instruction. Any optimization that takes this form (replace call to library +// function with simpler code that provides the same result) belongs in this +// file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplify-libcalls" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/hash_map" +#include "llvm/ADT/Statistic.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +/// This statistic keeps track of the total number of library calls that have +/// been simplified regardless of which call it is. +STATISTIC(SimplifiedLibCalls, "Number of library calls simplified"); + +namespace { + // Forward declarations + class LibCallOptimization; + class SimplifyLibCalls; + +/// This list is populated by the constructor for LibCallOptimization class. +/// Therefore all subclasses are registered here at static initialization time +/// and this list is what the SimplifyLibCalls pass uses to apply the individual +/// optimizations to the call sites. +/// @brief The list of optimizations deriving from LibCallOptimization +static LibCallOptimization *OptList = 0; + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. The SimplifyLibCalls pass will call the +/// ValidateCalledFunction method to ask the optimization if a given Function +/// is the kind that the optimization can handle. If the subclass returns true, +/// then SImplifyLibCalls will also call the OptimizeCall method to perform, +/// or attempt to perform, the optimization(s) for the library call. Otherwise, +/// OptimizeCall won't be called. Subclasses are responsible for providing the +/// name of the library call (strlen, strcpy, etc.) to the LibCallOptimization +/// constructor. This is used to efficiently select which call instructions to +/// optimize. The criteria for a "lib call" is "anything with well known +/// semantics", typically a library function that is defined by an international +/// standard. Because the semantics are well known, the optimizations can +/// generally short-circuit actually calling the function if there's a simpler +/// way (e.g. strlen(X) can be reduced to a constant if X is a constant global). +/// @brief Base class for library call optimizations +class VISIBILITY_HIDDEN LibCallOptimization { + LibCallOptimization **Prev, *Next; + const char *FunctionName; ///< Name of the library call we optimize +#ifndef NDEBUG + Statistic occurrences; ///< debug statistic (-debug-only=simplify-libcalls) +#endif +public: + /// The \p fname argument must be the name of the library function being + /// optimized by the subclass. + /// @brief Constructor that registers the optimization. + LibCallOptimization(const char *FName, const char *Description) + : FunctionName(FName) { + +#ifndef NDEBUG + occurrences.construct("simplify-libcalls", Description); +#endif + // Register this optimizer in the list of optimizations. + Next = OptList; + OptList = this; + Prev = &OptList; + if (Next) Next->Prev = &Next; + } + + /// getNext - All libcall optimizations are chained together into a list, + /// return the next one in the list. + LibCallOptimization *getNext() { return Next; } + + /// @brief Deregister from the optlist + virtual ~LibCallOptimization() { + *Prev = Next; + if (Next) Next->Prev = Prev; + } + + /// The implementation of this function in subclasses should determine if + /// \p F is suitable for the optimization. This method is called by + /// SimplifyLibCalls::runOnModule to short circuit visiting all the call + /// sites of such a function if that function is not suitable in the first + /// place. If the called function is suitabe, this method should return true; + /// false, otherwise. This function should also perform any lazy + /// initialization that the LibCallOptimization needs to do, if its to return + /// true. This avoids doing initialization until the optimizer is actually + /// going to be called upon to do some optimization. + /// @brief Determine if the function is suitable for optimization + virtual bool ValidateCalledFunction( + const Function* F, ///< The function that is the target of call sites + SimplifyLibCalls& SLC ///< The pass object invoking us + ) = 0; + + /// The implementations of this function in subclasses is the heart of the + /// SimplifyLibCalls algorithm. Sublcasses of this class implement + /// OptimizeCall to determine if (a) the conditions are right for optimizing + /// the call and (b) to perform the optimization. If an action is taken + /// against ci, the subclass is responsible for returning true and ensuring + /// that ci is erased from its parent. + /// @brief Optimize a call, if possible. + virtual bool OptimizeCall( + CallInst* ci, ///< The call instruction that should be optimized. + SimplifyLibCalls& SLC ///< The pass object invoking us + ) = 0; + + /// @brief Get the name of the library call being optimized + const char *getFunctionName() const { return FunctionName; } + + bool ReplaceCallWith(CallInst *CI, Value *V) { + if (!CI->use_empty()) + CI->replaceAllUsesWith(V); + CI->eraseFromParent(); + return true; + } + + /// @brief Called by SimplifyLibCalls to update the occurrences statistic. + void succeeded() { +#ifndef NDEBUG + DEBUG(++occurrences); +#endif + } +}; + +/// This class is an LLVM Pass that applies each of the LibCallOptimization +/// instances to all the call sites in a module, relatively efficiently. The +/// purpose of this pass is to provide optimizations for calls to well-known +/// functions with well-known semantics, such as those in the c library. The +/// class provides the basic infrastructure for handling runOnModule. Whenever +/// this pass finds a function call, it asks the appropriate optimizer to +/// validate the call (ValidateLibraryCall). If it is validated, then +/// the OptimizeCall method is also called. +/// @brief A ModulePass for optimizing well-known function calls. +class VISIBILITY_HIDDEN SimplifyLibCalls : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + SimplifyLibCalls() : ModulePass((intptr_t)&ID) {} + + /// We need some target data for accurate signature details that are + /// target dependent. So we require target data in our AnalysisUsage. + /// @brief Require TargetData from AnalysisUsage. + virtual void getAnalysisUsage(AnalysisUsage& Info) const { + // Ask that the TargetData analysis be performed before us so we can use + // the target data. + Info.addRequired<TargetData>(); + } + + /// For this pass, process all of the function calls in the module, calling + /// ValidateLibraryCall and OptimizeCall as appropriate. + /// @brief Run all the lib call optimizations on a Module. + virtual bool runOnModule(Module &M) { + reset(M); + + bool result = false; + hash_map<std::string, LibCallOptimization*> OptznMap; + for (LibCallOptimization *Optzn = OptList; Optzn; Optzn = Optzn->getNext()) + OptznMap[Optzn->getFunctionName()] = Optzn; + + // The call optimizations can be recursive. That is, the optimization might + // generate a call to another function which can also be optimized. This way + // we make the LibCallOptimization instances very specific to the case they + // handle. It also means we need to keep running over the function calls in + // the module until we don't get any more optimizations possible. + bool found_optimization = false; + do { + found_optimization = false; + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + // All the "well-known" functions are external and have external linkage + // because they live in a runtime library somewhere and were (probably) + // not compiled by LLVM. So, we only act on external functions that + // have external or dllimport linkage and non-empty uses. + if (!FI->isDeclaration() || + !(FI->hasExternalLinkage() || FI->hasDLLImportLinkage()) || + FI->use_empty()) + continue; + + // Get the optimization class that pertains to this function + hash_map<std::string, LibCallOptimization*>::iterator OMI = + OptznMap.find(FI->getName()); + if (OMI == OptznMap.end()) continue; + + LibCallOptimization *CO = OMI->second; + + // Make sure the called function is suitable for the optimization + if (!CO->ValidateCalledFunction(FI, *this)) + continue; + + // Loop over each of the uses of the function + for (Value::use_iterator UI = FI->use_begin(), UE = FI->use_end(); + UI != UE ; ) { + // If the use of the function is a call instruction + if (CallInst* CI = dyn_cast<CallInst>(*UI++)) { + // Do the optimization on the LibCallOptimization. + if (CO->OptimizeCall(CI, *this)) { + ++SimplifiedLibCalls; + found_optimization = result = true; + CO->succeeded(); + } + } + } + } + } while (found_optimization); + + return result; + } + + /// @brief Return the *current* module we're working on. + Module* getModule() const { return M; } + + /// @brief Return the *current* target data for the module we're working on. + TargetData* getTargetData() const { return TD; } + + /// @brief Return the size_t type -- syntactic shortcut + const Type* getIntPtrType() const { return TD->getIntPtrType(); } + + /// @brief Return a Function* for the putchar libcall + Constant *get_putchar() { + if (!putchar_func) + putchar_func = + M->getOrInsertFunction("putchar", Type::Int32Ty, Type::Int32Ty, NULL); + return putchar_func; + } + + /// @brief Return a Function* for the puts libcall + Constant *get_puts() { + if (!puts_func) + puts_func = M->getOrInsertFunction("puts", Type::Int32Ty, + PointerType::get(Type::Int8Ty), + NULL); + return puts_func; + } + + /// @brief Return a Function* for the fputc libcall + Constant *get_fputc(const Type* FILEptr_type) { + if (!fputc_func) + fputc_func = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, + FILEptr_type, NULL); + return fputc_func; + } + + /// @brief Return a Function* for the fputs libcall + Constant *get_fputs(const Type* FILEptr_type) { + if (!fputs_func) + fputs_func = M->getOrInsertFunction("fputs", Type::Int32Ty, + PointerType::get(Type::Int8Ty), + FILEptr_type, NULL); + return fputs_func; + } + + /// @brief Return a Function* for the fwrite libcall + Constant *get_fwrite(const Type* FILEptr_type) { + if (!fwrite_func) + fwrite_func = M->getOrInsertFunction("fwrite", TD->getIntPtrType(), + PointerType::get(Type::Int8Ty), + TD->getIntPtrType(), + TD->getIntPtrType(), + FILEptr_type, NULL); + return fwrite_func; + } + + /// @brief Return a Function* for the sqrt libcall + Constant *get_sqrt() { + if (!sqrt_func) + sqrt_func = M->getOrInsertFunction("sqrt", Type::DoubleTy, + Type::DoubleTy, NULL); + return sqrt_func; + } + + /// @brief Return a Function* for the strcpy libcall + Constant *get_strcpy() { + if (!strcpy_func) + strcpy_func = M->getOrInsertFunction("strcpy", + PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + NULL); + return strcpy_func; + } + + /// @brief Return a Function* for the strlen libcall + Constant *get_strlen() { + if (!strlen_func) + strlen_func = M->getOrInsertFunction("strlen", TD->getIntPtrType(), + PointerType::get(Type::Int8Ty), + NULL); + return strlen_func; + } + + /// @brief Return a Function* for the memchr libcall + Constant *get_memchr() { + if (!memchr_func) + memchr_func = M->getOrInsertFunction("memchr", + PointerType::get(Type::Int8Ty), + PointerType::get(Type::Int8Ty), + Type::Int32Ty, TD->getIntPtrType(), + NULL); + return memchr_func; + } + + /// @brief Return a Function* for the memcpy libcall + Constant *get_memcpy() { + if (!memcpy_func) { + const Type *SBP = PointerType::get(Type::Int8Ty); + const char *N = TD->getIntPtrType() == Type::Int32Ty ? + "llvm.memcpy.i32" : "llvm.memcpy.i64"; + memcpy_func = M->getOrInsertFunction(N, Type::VoidTy, SBP, SBP, + TD->getIntPtrType(), Type::Int32Ty, + NULL); + } + return memcpy_func; + } + + Constant *getUnaryFloatFunction(const char *Name, Constant *&Cache) { + if (!Cache) + Cache = M->getOrInsertFunction(Name, Type::FloatTy, Type::FloatTy, NULL); + return Cache; + } + + Constant *get_floorf() { return getUnaryFloatFunction("floorf", floorf_func);} + Constant *get_ceilf() { return getUnaryFloatFunction( "ceilf", ceilf_func);} + Constant *get_roundf() { return getUnaryFloatFunction("roundf", roundf_func);} + Constant *get_rintf() { return getUnaryFloatFunction( "rintf", rintf_func);} + Constant *get_nearbyintf() { return getUnaryFloatFunction("nearbyintf", + nearbyintf_func); } +private: + /// @brief Reset our cached data for a new Module + void reset(Module& mod) { + M = &mod; + TD = &getAnalysis<TargetData>(); + putchar_func = 0; + puts_func = 0; + fputc_func = 0; + fputs_func = 0; + fwrite_func = 0; + memcpy_func = 0; + memchr_func = 0; + sqrt_func = 0; + strcpy_func = 0; + strlen_func = 0; + floorf_func = 0; + ceilf_func = 0; + roundf_func = 0; + rintf_func = 0; + nearbyintf_func = 0; + } + +private: + /// Caches for function pointers. + Constant *putchar_func, *puts_func; + Constant *fputc_func, *fputs_func, *fwrite_func; + Constant *memcpy_func, *memchr_func; + Constant *sqrt_func; + Constant *strcpy_func, *strlen_func; + Constant *floorf_func, *ceilf_func, *roundf_func; + Constant *rintf_func, *nearbyintf_func; + Module *M; ///< Cached Module + TargetData *TD; ///< Cached TargetData +}; + +char SimplifyLibCalls::ID = 0; +// Register the pass +RegisterPass<SimplifyLibCalls> +X("simplify-libcalls", "Simplify well-known library calls"); + +} // anonymous namespace + +// The only public symbol in this file which just instantiates the pass object +ModulePass *llvm::createSimplifyLibCallsPass() { + return new SimplifyLibCalls(); +} + +// Classes below here, in the anonymous namespace, are all subclasses of the +// LibCallOptimization class, each implementing all optimizations possible for a +// single well-known library call. Each has a static singleton instance that +// auto registers it into the "optlist" global above. +namespace { + +// Forward declare utility functions. +static bool GetConstantStringInfo(Value *V, std::string &Str); +static Value *CastToCStr(Value *V, Instruction *IP); + +/// This LibCallOptimization will find instances of a call to "exit" that occurs +/// within the "main" function and change it to a simple "ret" instruction with +/// the same value passed to the exit function. When this is done, it splits the +/// basic block at the exit(3) call and deletes the call instruction. +/// @brief Replace calls to exit in main with a simple return +struct VISIBILITY_HIDDEN ExitInMainOptimization : public LibCallOptimization { + ExitInMainOptimization() : LibCallOptimization("exit", + "Number of 'exit' calls simplified") {} + + // Make sure the called function looks like exit (int argument, int return + // type, external linkage, not varargs). + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + return F->arg_size() >= 1 && F->arg_begin()->getType()->isInteger(); + } + + virtual bool OptimizeCall(CallInst* ci, SimplifyLibCalls& SLC) { + // To be careful, we check that the call to exit is coming from "main", that + // main has external linkage, and the return type of main and the argument + // to exit have the same type. + Function *from = ci->getParent()->getParent(); + if (from->hasExternalLinkage()) + if (from->getReturnType() == ci->getOperand(1)->getType()) + if (from->getName() == "main") { + // Okay, time to actually do the optimization. First, get the basic + // block of the call instruction + BasicBlock* bb = ci->getParent(); + + // Create a return instruction that we'll replace the call with. + // Note that the argument of the return is the argument of the call + // instruction. + new ReturnInst(ci->getOperand(1), ci); + + // Split the block at the call instruction which places it in a new + // basic block. + bb->splitBasicBlock(ci); + + // The block split caused a branch instruction to be inserted into + // the end of the original block, right after the return instruction + // that we put there. That's not a valid block, so delete the branch + // instruction. + bb->getInstList().pop_back(); + + // Now we can finally get rid of the call instruction which now lives + // in the new basic block. + ci->eraseFromParent(); + + // Optimization succeeded, return true. + return true; + } + // We didn't pass the criteria for this optimization so return false + return false; + } +} ExitInMainOptimizer; + +/// This LibCallOptimization will simplify a call to the strcat library +/// function. The simplification is possible only if the string being +/// concatenated is a constant array or a constant expression that results in +/// a constant string. In this case we can replace it with strlen + llvm.memcpy +/// of the constant string. Both of these calls are further reduced, if possible +/// on subsequent passes. +/// @brief Simplify the strcat library function. +struct VISIBILITY_HIDDEN StrCatOptimization : public LibCallOptimization { +public: + /// @brief Default constructor + StrCatOptimization() : LibCallOptimization("strcat", + "Number of 'strcat' calls simplified") {} + +public: + + /// @brief Make sure that the "strcat" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 2 && + FT->getReturnType() == PointerType::get(Type::Int8Ty) && + FT->getParamType(0) == FT->getReturnType() && + FT->getParamType(1) == FT->getReturnType(); + } + + /// @brief Optimize the strcat library function + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // Extract some information from the instruction + Value *Dst = CI->getOperand(1); + Value *Src = CI->getOperand(2); + + // Extract the initializer (while making numerous checks) from the + // source operand of the call to strcat. + std::string SrcStr; + if (!GetConstantStringInfo(Src, SrcStr)) + return false; + + // Handle the simple, do-nothing case + if (SrcStr.empty()) + return ReplaceCallWith(CI, Dst); + + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + CallInst *DstLen = new CallInst(SLC.get_strlen(), Dst, + Dst->getName()+".len", CI); + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Dst = new GetElementPtrInst(Dst, DstLen, Dst->getName()+".indexed", CI); + + // We have enough information to now generate the memcpy call to + // do the concatenation for us. + Value *Vals[] = { + Dst, Src, + ConstantInt::get(SLC.getIntPtrType(), SrcStr.size()+1), // copy nul byte. + ConstantInt::get(Type::Int32Ty, 1) // alignment + }; + new CallInst(SLC.get_memcpy(), Vals, 4, "", CI); + + return ReplaceCallWith(CI, Dst); + } +} StrCatOptimizer; + +/// This LibCallOptimization will simplify a call to the strchr library +/// function. It optimizes out cases where the arguments are both constant +/// and the result can be determined statically. +/// @brief Simplify the strcmp library function. +struct VISIBILITY_HIDDEN StrChrOptimization : public LibCallOptimization { +public: + StrChrOptimization() : LibCallOptimization("strchr", + "Number of 'strchr' calls simplified") {} + + /// @brief Make sure that the "strchr" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 2 && + FT->getReturnType() == PointerType::get(Type::Int8Ty) && + FT->getParamType(0) == FT->getReturnType() && + isa<IntegerType>(FT->getParamType(1)); + } + + /// @brief Perform the strchr optimizations + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // Check that the first argument to strchr is a constant array of sbyte. + std::string Str; + if (!GetConstantStringInfo(CI->getOperand(1), Str)) + return false; + + // If the second operand is not constant, just lower this to memchr since we + // know the length of the input string. + ConstantInt *CSI = dyn_cast<ConstantInt>(CI->getOperand(2)); + if (!CSI) { + Value *Args[3] = { + CI->getOperand(1), + CI->getOperand(2), + ConstantInt::get(SLC.getIntPtrType(), Str.size()+1) + }; + return ReplaceCallWith(CI, new CallInst(SLC.get_memchr(), Args, 3, + CI->getName(), CI)); + } + + // strchr can find the nul character. + Str += '\0'; + + // Get the character we're looking for + char CharValue = CSI->getSExtValue(); + + // Compute the offset + uint64_t i = 0; + while (1) { + if (i == Str.size()) // Didn't find the char. strchr returns null. + return ReplaceCallWith(CI, Constant::getNullValue(CI->getType())); + // Did we find our match? + if (Str[i] == CharValue) + break; + ++i; + } + + // strchr(s+n,c) -> gep(s+n+i,c) + // (if c is a constant integer and s is a constant string) + Value *Idx = ConstantInt::get(Type::Int64Ty, i); + Value *GEP = new GetElementPtrInst(CI->getOperand(1), Idx, + CI->getOperand(1)->getName() + + ".strchr", CI); + return ReplaceCallWith(CI, GEP); + } +} StrChrOptimizer; + +/// This LibCallOptimization will simplify a call to the strcmp library +/// function. It optimizes out cases where one or both arguments are constant +/// and the result can be determined statically. +/// @brief Simplify the strcmp library function. +struct VISIBILITY_HIDDEN StrCmpOptimization : public LibCallOptimization { +public: + StrCmpOptimization() : LibCallOptimization("strcmp", + "Number of 'strcmp' calls simplified") {} + + /// @brief Make sure that the "strcmp" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getReturnType() == Type::Int32Ty && FT->getNumParams() == 2 && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == PointerType::get(Type::Int8Ty); + } + + /// @brief Perform the strcmp optimization + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // First, check to see if src and destination are the same. If they are, + // then the optimization is to replace the CallInst with a constant 0 + // because the call is a no-op. + Value *Str1P = CI->getOperand(1); + Value *Str2P = CI->getOperand(2); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0)); + + std::string Str1; + if (!GetConstantStringInfo(Str1P, Str1)) + return false; + if (Str1.empty()) { + // strcmp("", x) -> *x + Value *V = new LoadInst(Str2P, CI->getName()+".load", CI); + V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI); + return ReplaceCallWith(CI, V); + } + + std::string Str2; + if (!GetConstantStringInfo(Str2P, Str2)) + return false; + if (Str2.empty()) { + // strcmp(x,"") -> *x + Value *V = new LoadInst(Str1P, CI->getName()+".load", CI); + V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI); + return ReplaceCallWith(CI, V); + } + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + int R = strcmp(Str1.c_str(), Str2.c_str()); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), R)); + } +} StrCmpOptimizer; + +/// This LibCallOptimization will simplify a call to the strncmp library +/// function. It optimizes out cases where one or both arguments are constant +/// and the result can be determined statically. +/// @brief Simplify the strncmp library function. +struct VISIBILITY_HIDDEN StrNCmpOptimization : public LibCallOptimization { +public: + StrNCmpOptimization() : LibCallOptimization("strncmp", + "Number of 'strncmp' calls simplified") {} + + /// @brief Make sure that the "strncmp" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getReturnType() == Type::Int32Ty && FT->getNumParams() == 3 && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == PointerType::get(Type::Int8Ty) && + isa<IntegerType>(FT->getParamType(2)); + return false; + } + + /// @brief Perform the strncmp optimization + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // First, check to see if src and destination are the same. If they are, + // then the optimization is to replace the CallInst with a constant 0 + // because the call is a no-op. + Value *Str1P = CI->getOperand(1); + Value *Str2P = CI->getOperand(2); + if (Str1P == Str2P) // strncmp(x,x, n) -> 0 + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0)); + + // Check the length argument, if it is Constant zero then the strings are + // considered equal. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + Length = LengthArg->getZExtValue(); + else + return false; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0)); + + std::string Str1; + if (!GetConstantStringInfo(Str1P, Str1)) + return false; + if (Str1.empty()) { + // strncmp("", x, n) -> *x + Value *V = new LoadInst(Str2P, CI->getName()+".load", CI); + V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI); + return ReplaceCallWith(CI, V); + } + + std::string Str2; + if (!GetConstantStringInfo(Str2P, Str2)) + return false; + if (Str2.empty()) { + // strncmp(x, "", n) -> *x + Value *V = new LoadInst(Str1P, CI->getName()+".load", CI); + V = new ZExtInst(V, CI->getType(), CI->getName()+".int", CI); + return ReplaceCallWith(CI, V); + } + + // strncmp(x, y, n) -> cnst (if both x and y are constant strings) + int R = strncmp(Str1.c_str(), Str2.c_str(), Length); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), R)); + } +} StrNCmpOptimizer; + +/// This LibCallOptimization will simplify a call to the strcpy library +/// function. Two optimizations are possible: +/// (1) If src and dest are the same and not volatile, just return dest +/// (2) If the src is a constant then we can convert to llvm.memmove +/// @brief Simplify the strcpy library function. +struct VISIBILITY_HIDDEN StrCpyOptimization : public LibCallOptimization { +public: + StrCpyOptimization() : LibCallOptimization("strcpy", + "Number of 'strcpy' calls simplified") {} + + /// @brief Make sure that the "strcpy" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 2 && + FT->getParamType(0) == FT->getParamType(1) && + FT->getReturnType() == FT->getParamType(0) && + FT->getParamType(0) == PointerType::get(Type::Int8Ty); + } + + /// @brief Perform the strcpy optimization + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // First, check to see if src and destination are the same. If they are, + // then the optimization is to replace the CallInst with the destination + // because the call is a no-op. Note that this corresponds to the + // degenerate strcpy(X,X) case which should have "undefined" results + // according to the C specification. However, it occurs sometimes and + // we optimize it as a no-op. + Value *Dst = CI->getOperand(1); + Value *Src = CI->getOperand(2); + if (Dst == Src) { + // strcpy(x, x) -> x + return ReplaceCallWith(CI, Dst); + } + + // Get the length of the constant string referenced by the Src operand. + std::string SrcStr; + if (!GetConstantStringInfo(Src, SrcStr)) + return false; + + // If the constant string's length is zero we can optimize this by just + // doing a store of 0 at the first byte of the destination + if (SrcStr.size() == 0) { + new StoreInst(ConstantInt::get(Type::Int8Ty, 0), Dst, CI); + return ReplaceCallWith(CI, Dst); + } + + // We have enough information to now generate the memcpy call to + // do the concatenation for us. + Value *MemcpyOps[] = { + Dst, Src, // Pass length including nul byte. + ConstantInt::get(SLC.getIntPtrType(), SrcStr.size()+1), + ConstantInt::get(Type::Int32Ty, 1) // alignment + }; + new CallInst(SLC.get_memcpy(), MemcpyOps, 4, "", CI); + + return ReplaceCallWith(CI, Dst); + } +} StrCpyOptimizer; + +/// This LibCallOptimization will simplify a call to the strlen library +/// function by replacing it with a constant value if the string provided to +/// it is a constant array. +/// @brief Simplify the strlen library function. +struct VISIBILITY_HIDDEN StrLenOptimization : public LibCallOptimization { + StrLenOptimization() : LibCallOptimization("strlen", + "Number of 'strlen' calls simplified") {} + + /// @brief Make sure that the "strlen" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 1 && + FT->getParamType(0) == PointerType::get(Type::Int8Ty) && + isa<IntegerType>(FT->getReturnType()); + } + + /// @brief Perform the strlen optimization + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // Make sure we're dealing with an sbyte* here. + Value *Src = CI->getOperand(1); + + // Does the call to strlen have exactly one use? + if (CI->hasOneUse()) { + // Is that single use a icmp operator? + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CI->use_back())) + // Is it compared against a constant integer? + if (ConstantInt *Cst = dyn_cast<ConstantInt>(Cmp->getOperand(1))) { + // If its compared against length 0 with == or != + if (Cst->getZExtValue() == 0 && Cmp->isEquality()) { + // strlen(x) != 0 -> *x != 0 + // strlen(x) == 0 -> *x == 0 + Value *V = new LoadInst(Src, Src->getName()+".first", CI); + V = new ICmpInst(Cmp->getPredicate(), V, + ConstantInt::get(Type::Int8Ty, 0), + Cmp->getName()+".strlen", CI); + Cmp->replaceAllUsesWith(V); + Cmp->eraseFromParent(); + return ReplaceCallWith(CI, 0); // no uses. + } + } + } + + // Get the length of the constant string operand + std::string Str; + if (!GetConstantStringInfo(Src, Str)) + return false; + + // strlen("xyz") -> 3 (for example) + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), Str.size())); + } +} StrLenOptimizer; + +/// IsOnlyUsedInEqualsComparison - Return true if it only matters that the value +/// is equal or not-equal to zero. +static bool IsOnlyUsedInEqualsZeroComparison(Instruction *I) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +/// This memcmpOptimization will simplify a call to the memcmp library +/// function. +struct VISIBILITY_HIDDEN memcmpOptimization : public LibCallOptimization { + /// @brief Default Constructor + memcmpOptimization() + : LibCallOptimization("memcmp", "Number of 'memcmp' calls simplified") {} + + /// @brief Make sure that the "memcmp" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &TD) { + Function::const_arg_iterator AI = F->arg_begin(); + if (F->arg_size() != 3 || !isa<PointerType>(AI->getType())) return false; + if (!isa<PointerType>((++AI)->getType())) return false; + if (!(++AI)->getType()->isInteger()) return false; + if (!F->getReturnType()->isInteger()) return false; + return true; + } + + /// Because of alignment and instruction information that we don't have, we + /// leave the bulk of this to the code generators. + /// + /// Note that we could do much more if we could force alignment on otherwise + /// small aligned allocas, or if we could indicate that loads have a small + /// alignment. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &TD) { + Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + + // If the two operands are the same, return zero. + if (LHS == RHS) { + // memcmp(s,s,x) -> 0 + return ReplaceCallWith(CI, Constant::getNullValue(CI->getType())); + } + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!LenC) return false; + uint64_t Len = LenC->getZExtValue(); + + // If the length is zero, this returns 0. + switch (Len) { + case 0: + // memcmp(s1,s2,0) -> 0 + return ReplaceCallWith(CI, Constant::getNullValue(CI->getType())); + case 1: { + // memcmp(S1,S2,1) -> *(ubyte*)S1 - *(ubyte*)S2 + const Type *UCharPtr = PointerType::get(Type::Int8Ty); + CastInst *Op1Cast = CastInst::create( + Instruction::BitCast, LHS, UCharPtr, LHS->getName(), CI); + CastInst *Op2Cast = CastInst::create( + Instruction::BitCast, RHS, UCharPtr, RHS->getName(), CI); + Value *S1V = new LoadInst(Op1Cast, LHS->getName()+".val", CI); + Value *S2V = new LoadInst(Op2Cast, RHS->getName()+".val", CI); + Value *RV = BinaryOperator::createSub(S1V, S2V, CI->getName()+".diff",CI); + if (RV->getType() != CI->getType()) + RV = CastInst::createIntegerCast(RV, CI->getType(), false, + RV->getName(), CI); + return ReplaceCallWith(CI, RV); + } + case 2: + if (IsOnlyUsedInEqualsZeroComparison(CI)) { + // TODO: IF both are aligned, use a short load/compare. + + // memcmp(S1,S2,2) -> S1[0]-S2[0] | S1[1]-S2[1] iff only ==/!= 0 matters + const Type *UCharPtr = PointerType::get(Type::Int8Ty); + CastInst *Op1Cast = CastInst::create( + Instruction::BitCast, LHS, UCharPtr, LHS->getName(), CI); + CastInst *Op2Cast = CastInst::create( + Instruction::BitCast, RHS, UCharPtr, RHS->getName(), CI); + Value *S1V1 = new LoadInst(Op1Cast, LHS->getName()+".val1", CI); + Value *S2V1 = new LoadInst(Op2Cast, RHS->getName()+".val1", CI); + Value *D1 = BinaryOperator::createSub(S1V1, S2V1, + CI->getName()+".d1", CI); + Constant *One = ConstantInt::get(Type::Int32Ty, 1); + Value *G1 = new GetElementPtrInst(Op1Cast, One, "next1v", CI); + Value *G2 = new GetElementPtrInst(Op2Cast, One, "next2v", CI); + Value *S1V2 = new LoadInst(G1, LHS->getName()+".val2", CI); + Value *S2V2 = new LoadInst(G2, RHS->getName()+".val2", CI); + Value *D2 = BinaryOperator::createSub(S1V2, S2V2, + CI->getName()+".d1", CI); + Value *Or = BinaryOperator::createOr(D1, D2, CI->getName()+".res", CI); + if (Or->getType() != CI->getType()) + Or = CastInst::createIntegerCast(Or, CI->getType(), false /*ZExt*/, + Or->getName(), CI); + return ReplaceCallWith(CI, Or); + } + break; + default: + break; + } + + return false; + } +} memcmpOptimizer; + + +/// This LibCallOptimization will simplify a call to the memcpy library +/// function by expanding it out to a single store of size 0, 1, 2, 4, or 8 +/// bytes depending on the length of the string and the alignment. Additional +/// optimizations are possible in code generation (sequence of immediate store) +/// @brief Simplify the memcpy library function. +struct VISIBILITY_HIDDEN LLVMMemCpyMoveOptzn : public LibCallOptimization { + LLVMMemCpyMoveOptzn(const char* fname, const char* desc) + : LibCallOptimization(fname, desc) {} + + /// @brief Make sure that the "memcpy" function has the right prototype + virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& TD) { + // Just make sure this has 4 arguments per LLVM spec. + return (f->arg_size() == 4); + } + + /// Because of alignment and instruction information that we don't have, we + /// leave the bulk of this to the code generators. The optimization here just + /// deals with a few degenerate cases where the length of the string and the + /// alignment match the sizes of our intrinsic types so we can do a load and + /// store instead of the memcpy call. + /// @brief Perform the memcpy optimization. + virtual bool OptimizeCall(CallInst* ci, SimplifyLibCalls& TD) { + // Make sure we have constant int values to work with + ConstantInt* LEN = dyn_cast<ConstantInt>(ci->getOperand(3)); + if (!LEN) + return false; + ConstantInt* ALIGN = dyn_cast<ConstantInt>(ci->getOperand(4)); + if (!ALIGN) + return false; + + // If the length is larger than the alignment, we can't optimize + uint64_t len = LEN->getZExtValue(); + uint64_t alignment = ALIGN->getZExtValue(); + if (alignment == 0) + alignment = 1; // Alignment 0 is identity for alignment 1 + if (len > alignment) + return false; + + // Get the type we will cast to, based on size of the string + Value* dest = ci->getOperand(1); + Value* src = ci->getOperand(2); + const Type* castType = 0; + switch (len) { + case 0: + // memcpy(d,s,0,a) -> d + return ReplaceCallWith(ci, 0); + case 1: castType = Type::Int8Ty; break; + case 2: castType = Type::Int16Ty; break; + case 4: castType = Type::Int32Ty; break; + case 8: castType = Type::Int64Ty; break; + default: + return false; + } + + // Cast source and dest to the right sized primitive and then load/store + CastInst* SrcCast = CastInst::create(Instruction::BitCast, + src, PointerType::get(castType), src->getName()+".cast", ci); + CastInst* DestCast = CastInst::create(Instruction::BitCast, + dest, PointerType::get(castType),dest->getName()+".cast", ci); + LoadInst* LI = new LoadInst(SrcCast,SrcCast->getName()+".val",ci); + new StoreInst(LI, DestCast, ci); + return ReplaceCallWith(ci, 0); + } +}; + +/// This LibCallOptimization will simplify a call to the memcpy/memmove library +/// functions. +LLVMMemCpyMoveOptzn LLVMMemCpyOptimizer32("llvm.memcpy.i32", + "Number of 'llvm.memcpy' calls simplified"); +LLVMMemCpyMoveOptzn LLVMMemCpyOptimizer64("llvm.memcpy.i64", + "Number of 'llvm.memcpy' calls simplified"); +LLVMMemCpyMoveOptzn LLVMMemMoveOptimizer32("llvm.memmove.i32", + "Number of 'llvm.memmove' calls simplified"); +LLVMMemCpyMoveOptzn LLVMMemMoveOptimizer64("llvm.memmove.i64", + "Number of 'llvm.memmove' calls simplified"); + +/// This LibCallOptimization will simplify a call to the memset library +/// function by expanding it out to a single store of size 0, 1, 2, 4, or 8 +/// bytes depending on the length argument. +struct VISIBILITY_HIDDEN LLVMMemSetOptimization : public LibCallOptimization { + /// @brief Default Constructor + LLVMMemSetOptimization(const char *Name) : LibCallOptimization(Name, + "Number of 'llvm.memset' calls simplified") {} + + /// @brief Make sure that the "memset" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &TD) { + // Just make sure this has 3 arguments per LLVM spec. + return F->arg_size() == 4; + } + + /// Because of alignment and instruction information that we don't have, we + /// leave the bulk of this to the code generators. The optimization here just + /// deals with a few degenerate cases where the length parameter is constant + /// and the alignment matches the sizes of our intrinsic types so we can do + /// store instead of the memcpy call. Other calls are transformed into the + /// llvm.memset intrinsic. + /// @brief Perform the memset optimization. + virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &TD) { + // Make sure we have constant int values to work with + ConstantInt* LEN = dyn_cast<ConstantInt>(ci->getOperand(3)); + if (!LEN) + return false; + ConstantInt* ALIGN = dyn_cast<ConstantInt>(ci->getOperand(4)); + if (!ALIGN) + return false; + + // Extract the length and alignment + uint64_t len = LEN->getZExtValue(); + uint64_t alignment = ALIGN->getZExtValue(); + + // Alignment 0 is identity for alignment 1 + if (alignment == 0) + alignment = 1; + + // If the length is zero, this is a no-op + if (len == 0) { + // memset(d,c,0,a) -> noop + return ReplaceCallWith(ci, 0); + } + + // If the length is larger than the alignment, we can't optimize + if (len > alignment) + return false; + + // Make sure we have a constant ubyte to work with so we can extract + // the value to be filled. + ConstantInt* FILL = dyn_cast<ConstantInt>(ci->getOperand(2)); + if (!FILL) + return false; + if (FILL->getType() != Type::Int8Ty) + return false; + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + + // Extract the fill character + uint64_t fill_char = FILL->getZExtValue(); + uint64_t fill_value = fill_char; + + // Get the type we will cast to, based on size of memory area to fill, and + // and the value we will store there. + Value* dest = ci->getOperand(1); + const Type* castType = 0; + switch (len) { + case 1: + castType = Type::Int8Ty; + break; + case 2: + castType = Type::Int16Ty; + fill_value |= fill_char << 8; + break; + case 4: + castType = Type::Int32Ty; + fill_value |= fill_char << 8 | fill_char << 16 | fill_char << 24; + break; + case 8: + castType = Type::Int64Ty; + fill_value |= fill_char << 8 | fill_char << 16 | fill_char << 24; + fill_value |= fill_char << 32 | fill_char << 40 | fill_char << 48; + fill_value |= fill_char << 56; + break; + default: + return false; + } + + // Cast dest to the right sized primitive and then load/store + CastInst* DestCast = new BitCastInst(dest, PointerType::get(castType), + dest->getName()+".cast", ci); + new StoreInst(ConstantInt::get(castType,fill_value),DestCast, ci); + return ReplaceCallWith(ci, 0); + } +}; + +LLVMMemSetOptimization MemSet32Optimizer("llvm.memset.i32"); +LLVMMemSetOptimization MemSet64Optimizer("llvm.memset.i64"); + + +/// This LibCallOptimization will simplify calls to the "pow" library +/// function. It looks for cases where the result of pow is well known and +/// substitutes the appropriate value. +/// @brief Simplify the pow library function. +struct VISIBILITY_HIDDEN PowOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + PowOptimization() : LibCallOptimization("pow", + "Number of 'pow' calls simplified") {} + + /// @brief Make sure that the "pow" function has the right prototype + virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& SLC){ + // Just make sure this has 2 arguments + return (f->arg_size() == 2); + } + + /// @brief Perform the pow optimization. + virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &SLC) { + const Type *Ty = cast<Function>(ci->getOperand(0))->getReturnType(); + Value* base = ci->getOperand(1); + Value* expn = ci->getOperand(2); + if (ConstantFP *Op1 = dyn_cast<ConstantFP>(base)) { + double Op1V = Op1->getValue(); + if (Op1V == 1.0) // pow(1.0,x) -> 1.0 + return ReplaceCallWith(ci, ConstantFP::get(Ty, 1.0)); + } else if (ConstantFP* Op2 = dyn_cast<ConstantFP>(expn)) { + double Op2V = Op2->getValue(); + if (Op2V == 0.0) { + // pow(x,0.0) -> 1.0 + return ReplaceCallWith(ci, ConstantFP::get(Ty,1.0)); + } else if (Op2V == 0.5) { + // pow(x,0.5) -> sqrt(x) + CallInst* sqrt_inst = new CallInst(SLC.get_sqrt(), base, + ci->getName()+".pow",ci); + return ReplaceCallWith(ci, sqrt_inst); + } else if (Op2V == 1.0) { + // pow(x,1.0) -> x + return ReplaceCallWith(ci, base); + } else if (Op2V == -1.0) { + // pow(x,-1.0) -> 1.0/x + Value *div_inst = + BinaryOperator::createFDiv(ConstantFP::get(Ty, 1.0), base, + ci->getName()+".pow", ci); + return ReplaceCallWith(ci, div_inst); + } + } + return false; // opt failed + } +} PowOptimizer; + +/// This LibCallOptimization will simplify calls to the "printf" library +/// function. It looks for cases where the result of printf is not used and the +/// operation can be reduced to something simpler. +/// @brief Simplify the printf library function. +struct VISIBILITY_HIDDEN PrintfOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + PrintfOptimization() : LibCallOptimization("printf", + "Number of 'printf' calls simplified") {} + + /// @brief Make sure that the "printf" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + // Just make sure this has at least 1 argument and returns an integer or + // void type. + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() >= 1 && + (isa<IntegerType>(FT->getReturnType()) || + FT->getReturnType() == Type::VoidTy); + } + + /// @brief Perform the printf optimization. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // All the optimizations depend on the length of the first argument and the + // fact that it is a constant string array. Check that now + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + return false; + + // If this is a simple constant string with no format specifiers that ends + // with a \n, turn it into a puts call. + if (FormatStr.empty()) { + // Tolerate printf's declared void. + if (CI->use_empty()) return ReplaceCallWith(CI, 0); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0)); + } + + if (FormatStr.size() == 1) { + // Turn this into a putchar call, even if it is a %. + Value *V = ConstantInt::get(Type::Int32Ty, FormatStr[0]); + new CallInst(SLC.get_putchar(), V, "", CI); + if (CI->use_empty()) return ReplaceCallWith(CI, 0); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1)); + } + + // Check to see if the format str is something like "foo\n", in which case + // we convert it to a puts call. We don't allow it to contain any format + // characters. + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == std::string::npos) { + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr.erase(FormatStr.end()-1); + Constant *Init = ConstantArray::get(FormatStr, true); + Constant *GV = new GlobalVariable(Init->getType(), true, + GlobalVariable::InternalLinkage, + Init, "str", + CI->getParent()->getParent()->getParent()); + // Cast GV to be a pointer to char. + GV = ConstantExpr::getBitCast(GV, PointerType::get(Type::Int8Ty)); + new CallInst(SLC.get_puts(), GV, "", CI); + + if (CI->use_empty()) return ReplaceCallWith(CI, 0); + return ReplaceCallWith(CI, + ConstantInt::get(CI->getType(), FormatStr.size())); + } + + + // Only support %c or "%s\n" for now. + if (FormatStr.size() < 2 || FormatStr[0] != '%') + return false; + + // Get the second character and switch on its value + switch (FormatStr[1]) { + default: return false; + case 's': + if (FormatStr != "%s\n" || CI->getNumOperands() < 3 || + // TODO: could insert strlen call to compute string length. + !CI->use_empty()) + return false; + + // printf("%s\n",str) -> puts(str) + new CallInst(SLC.get_puts(), CastToCStr(CI->getOperand(2), CI), + CI->getName(), CI); + return ReplaceCallWith(CI, 0); + case 'c': { + // printf("%c",c) -> putchar(c) + if (FormatStr.size() != 2 || CI->getNumOperands() < 3) + return false; + + Value *V = CI->getOperand(2); + if (!isa<IntegerType>(V->getType()) || + cast<IntegerType>(V->getType())->getBitWidth() > 32) + return false; + + V = CastInst::createZExtOrBitCast(V, Type::Int32Ty, CI->getName()+".int", + CI); + new CallInst(SLC.get_putchar(), V, "", CI); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1)); + } + } + } +} PrintfOptimizer; + +/// This LibCallOptimization will simplify calls to the "fprintf" library +/// function. It looks for cases where the result of fprintf is not used and the +/// operation can be reduced to something simpler. +/// @brief Simplify the fprintf library function. +struct VISIBILITY_HIDDEN FPrintFOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + FPrintFOptimization() : LibCallOptimization("fprintf", + "Number of 'fprintf' calls simplified") {} + + /// @brief Make sure that the "fprintf" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 2 && // two fixed arguments. + FT->getParamType(1) == PointerType::get(Type::Int8Ty) && + isa<PointerType>(FT->getParamType(0)) && + isa<IntegerType>(FT->getReturnType()); + } + + /// @brief Perform the fprintf optimization. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // If the call has more than 3 operands, we can't optimize it + if (CI->getNumOperands() != 3 && CI->getNumOperands() != 4) + return false; + + // All the optimizations depend on the format string. + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + return false; + + // If this is just a format string, turn it into fwrite. + if (CI->getNumOperands() == 3) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return false; // we found a format specifier + + // fprintf(file,fmt) -> fwrite(fmt,strlen(fmt),file) + const Type *FILEty = CI->getOperand(1)->getType(); + + Value *FWriteArgs[] = { + CI->getOperand(2), + ConstantInt::get(SLC.getIntPtrType(), FormatStr.size()), + ConstantInt::get(SLC.getIntPtrType(), 1), + CI->getOperand(1) + }; + new CallInst(SLC.get_fwrite(FILEty), FWriteArgs, 4, CI->getName(), CI); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), + FormatStr.size())); + } + + // The remaining optimizations require the format string to be length 2: + // "%s" or "%c". + if (FormatStr.size() != 2 || FormatStr[0] != '%') + return false; + + // Get the second character and switch on its value + switch (FormatStr[1]) { + case 'c': { + // fprintf(file,"%c",c) -> fputc(c,file) + const Type *FILETy = CI->getOperand(1)->getType(); + Value *C = CastInst::createZExtOrBitCast(CI->getOperand(3), Type::Int32Ty, + CI->getName()+".int", CI); + new CallInst(SLC.get_fputc(FILETy), C, CI->getOperand(1), "", CI); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1)); + } + case 's': { + const Type *FILETy = CI->getOperand(1)->getType(); + + // If the result of the fprintf call is used, we can't do this. + // TODO: we should insert a strlen call. + if (!CI->use_empty()) + return false; + + // fprintf(file,"%s",str) -> fputs(str,file) + new CallInst(SLC.get_fputs(FILETy), CastToCStr(CI->getOperand(3), CI), + CI->getOperand(1), CI->getName(), CI); + return ReplaceCallWith(CI, 0); + } + default: + return false; + } + } +} FPrintFOptimizer; + +/// This LibCallOptimization will simplify calls to the "sprintf" library +/// function. It looks for cases where the result of sprintf is not used and the +/// operation can be reduced to something simpler. +/// @brief Simplify the sprintf library function. +struct VISIBILITY_HIDDEN SPrintFOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + SPrintFOptimization() : LibCallOptimization("sprintf", + "Number of 'sprintf' calls simplified") {} + + /// @brief Make sure that the "sprintf" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 2 && // two fixed arguments. + FT->getParamType(1) == PointerType::get(Type::Int8Ty) && + FT->getParamType(0) == FT->getParamType(1) && + isa<IntegerType>(FT->getReturnType()); + } + + /// @brief Perform the sprintf optimization. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // If the call has more than 3 operands, we can't optimize it + if (CI->getNumOperands() != 3 && CI->getNumOperands() != 4) + return false; + + std::string FormatStr; + if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + return false; + + if (CI->getNumOperands() == 3) { + // Make sure there's no % in the constant array + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return false; // we found a format specifier + + // sprintf(str,fmt) -> llvm.memcpy(str,fmt,strlen(fmt),1) + Value *MemCpyArgs[] = { + CI->getOperand(1), CI->getOperand(2), + ConstantInt::get(SLC.getIntPtrType(), + FormatStr.size()+1), // Copy the nul byte. + ConstantInt::get(Type::Int32Ty, 1) + }; + new CallInst(SLC.get_memcpy(), MemCpyArgs, 4, "", CI); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), + FormatStr.size())); + } + + // The remaining optimizations require the format string to be "%s" or "%c". + if (FormatStr.size() != 2 || FormatStr[0] != '%') + return false; + + // Get the second character and switch on its value + switch (FormatStr[1]) { + case 'c': { + // sprintf(dest,"%c",chr) -> store chr, dest + Value *V = CastInst::createTruncOrBitCast(CI->getOperand(3), + Type::Int8Ty, "char", CI); + new StoreInst(V, CI->getOperand(1), CI); + Value *Ptr = new GetElementPtrInst(CI->getOperand(1), + ConstantInt::get(Type::Int32Ty, 1), + CI->getOperand(1)->getName()+".end", + CI); + new StoreInst(ConstantInt::get(Type::Int8Ty,0), Ptr, CI); + return ReplaceCallWith(CI, ConstantInt::get(Type::Int32Ty, 1)); + } + case 's': { + // sprintf(dest,"%s",str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + Value *Len = new CallInst(SLC.get_strlen(), + CastToCStr(CI->getOperand(3), CI), + CI->getOperand(3)->getName()+".len", CI); + Value *UnincLen = Len; + Len = BinaryOperator::createAdd(Len, ConstantInt::get(Len->getType(), 1), + Len->getName()+"1", CI); + Value *MemcpyArgs[4] = { + CI->getOperand(1), + CastToCStr(CI->getOperand(3), CI), + Len, + ConstantInt::get(Type::Int32Ty, 1) + }; + new CallInst(SLC.get_memcpy(), MemcpyArgs, 4, "", CI); + + // The strlen result is the unincremented number of bytes in the string. + if (!CI->use_empty()) { + if (UnincLen->getType() != CI->getType()) + UnincLen = CastInst::createIntegerCast(UnincLen, CI->getType(), false, + Len->getName(), CI); + CI->replaceAllUsesWith(UnincLen); + } + return ReplaceCallWith(CI, 0); + } + } + return false; + } +} SPrintFOptimizer; + +/// This LibCallOptimization will simplify calls to the "fputs" library +/// function. It looks for cases where the result of fputs is not used and the +/// operation can be reduced to something simpler. +/// @brief Simplify the fputs library function. +struct VISIBILITY_HIDDEN FPutsOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + FPutsOptimization() : LibCallOptimization("fputs", + "Number of 'fputs' calls simplified") {} + + /// @brief Make sure that the "fputs" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + // Just make sure this has 2 arguments + return F->arg_size() == 2; + } + + /// @brief Perform the fputs optimization. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // If the result is used, none of these optimizations work. + if (!CI->use_empty()) + return false; + + // All the optimizations depend on the length of the first argument and the + // fact that it is a constant string array. Check that now + std::string Str; + if (!GetConstantStringInfo(CI->getOperand(1), Str)) + return false; + + const Type *FILETy = CI->getOperand(2)->getType(); + // fputs(s,F) -> fwrite(s,1,len,F) (if s is constant and strlen(s) > 1) + Value *FWriteParms[4] = { + CI->getOperand(1), + ConstantInt::get(SLC.getIntPtrType(), Str.size()), + ConstantInt::get(SLC.getIntPtrType(), 1), + CI->getOperand(2) + }; + new CallInst(SLC.get_fwrite(FILETy), FWriteParms, 4, "", CI); + return ReplaceCallWith(CI, 0); // Known to have no uses (see above). + } +} FPutsOptimizer; + +/// This LibCallOptimization will simplify calls to the "fwrite" function. +struct VISIBILITY_HIDDEN FWriteOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + FWriteOptimization() : LibCallOptimization("fwrite", + "Number of 'fwrite' calls simplified") {} + + /// @brief Make sure that the "fputs" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + const FunctionType *FT = F->getFunctionType(); + return FT->getNumParams() == 4 && + FT->getParamType(0) == PointerType::get(Type::Int8Ty) && + FT->getParamType(1) == FT->getParamType(2) && + isa<IntegerType>(FT->getParamType(1)) && + isa<PointerType>(FT->getParamType(3)) && + isa<IntegerType>(FT->getReturnType()); + } + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // Get the element size and count. + uint64_t EltSize, EltCount; + if (ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(2))) + EltSize = C->getZExtValue(); + else + return false; + if (ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(3))) + EltCount = C->getZExtValue(); + else + return false; + + // If this is writing zero records, remove the call (it's a noop). + if (EltSize * EltCount == 0) + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 0)); + + // If this is writing one byte, turn it into fputc. + if (EltSize == 1 && EltCount == 1) { + // fwrite(s,1,1,F) -> fputc(s[0],F) + Value *Ptr = CI->getOperand(1); + Value *Val = new LoadInst(Ptr, Ptr->getName()+".byte", CI); + Val = new ZExtInst(Val, Type::Int32Ty, Val->getName()+".int", CI); + const Type *FILETy = CI->getOperand(4)->getType(); + new CallInst(SLC.get_fputc(FILETy), Val, CI->getOperand(4), "", CI); + return ReplaceCallWith(CI, ConstantInt::get(CI->getType(), 1)); + } + return false; + } +} FWriteOptimizer; + +/// This LibCallOptimization will simplify calls to the "isdigit" library +/// function. It simply does range checks the parameter explicitly. +/// @brief Simplify the isdigit library function. +struct VISIBILITY_HIDDEN isdigitOptimization : public LibCallOptimization { +public: + isdigitOptimization() : LibCallOptimization("isdigit", + "Number of 'isdigit' calls simplified") {} + + /// @brief Make sure that the "isdigit" function has the right prototype + virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& SLC){ + // Just make sure this has 1 argument + return (f->arg_size() == 1); + } + + /// @brief Perform the toascii optimization. + virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &SLC) { + if (ConstantInt* CI = dyn_cast<ConstantInt>(ci->getOperand(1))) { + // isdigit(c) -> 0 or 1, if 'c' is constant + uint64_t val = CI->getZExtValue(); + if (val >= '0' && val <= '9') + return ReplaceCallWith(ci, ConstantInt::get(Type::Int32Ty, 1)); + else + return ReplaceCallWith(ci, ConstantInt::get(Type::Int32Ty, 0)); + } + + // isdigit(c) -> (unsigned)c - '0' <= 9 + CastInst* cast = CastInst::createIntegerCast(ci->getOperand(1), + Type::Int32Ty, false/*ZExt*/, ci->getOperand(1)->getName()+".uint", ci); + BinaryOperator* sub_inst = BinaryOperator::createSub(cast, + ConstantInt::get(Type::Int32Ty,0x30), + ci->getOperand(1)->getName()+".sub",ci); + ICmpInst* setcond_inst = new ICmpInst(ICmpInst::ICMP_ULE,sub_inst, + ConstantInt::get(Type::Int32Ty,9), + ci->getOperand(1)->getName()+".cmp",ci); + CastInst* c2 = new ZExtInst(setcond_inst, Type::Int32Ty, + ci->getOperand(1)->getName()+".isdigit", ci); + return ReplaceCallWith(ci, c2); + } +} isdigitOptimizer; + +struct VISIBILITY_HIDDEN isasciiOptimization : public LibCallOptimization { +public: + isasciiOptimization() + : LibCallOptimization("isascii", "Number of 'isascii' calls simplified") {} + + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + return F->arg_size() == 1 && F->arg_begin()->getType()->isInteger() && + F->getReturnType()->isInteger(); + } + + /// @brief Perform the isascii optimization. + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { + // isascii(c) -> (unsigned)c < 128 + Value *V = CI->getOperand(1); + Value *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, V, + ConstantInt::get(V->getType(), 128), + V->getName()+".isascii", CI); + if (Cmp->getType() != CI->getType()) + Cmp = new ZExtInst(Cmp, CI->getType(), Cmp->getName(), CI); + return ReplaceCallWith(CI, Cmp); + } +} isasciiOptimizer; + + +/// This LibCallOptimization will simplify calls to the "toascii" library +/// function. It simply does the corresponding and operation to restrict the +/// range of values to the ASCII character set (0-127). +/// @brief Simplify the toascii library function. +struct VISIBILITY_HIDDEN ToAsciiOptimization : public LibCallOptimization { +public: + /// @brief Default Constructor + ToAsciiOptimization() : LibCallOptimization("toascii", + "Number of 'toascii' calls simplified") {} + + /// @brief Make sure that the "fputs" function has the right prototype + virtual bool ValidateCalledFunction(const Function* f, SimplifyLibCalls& SLC){ + // Just make sure this has 2 arguments + return (f->arg_size() == 1); + } + + /// @brief Perform the toascii optimization. + virtual bool OptimizeCall(CallInst *ci, SimplifyLibCalls &SLC) { + // toascii(c) -> (c & 0x7f) + Value *chr = ci->getOperand(1); + Value *and_inst = BinaryOperator::createAnd(chr, + ConstantInt::get(chr->getType(),0x7F),ci->getName()+".toascii",ci); + return ReplaceCallWith(ci, and_inst); + } +} ToAsciiOptimizer; + +/// This LibCallOptimization will simplify calls to the "ffs" library +/// calls which find the first set bit in an int, long, or long long. The +/// optimization is to compute the result at compile time if the argument is +/// a constant. +/// @brief Simplify the ffs library function. +struct VISIBILITY_HIDDEN FFSOptimization : public LibCallOptimization { +protected: + /// @brief Subclass Constructor + FFSOptimization(const char* funcName, const char* description) + : LibCallOptimization(funcName, description) {} + +public: + /// @brief Default Constructor + FFSOptimization() : LibCallOptimization("ffs", + "Number of 'ffs' calls simplified") {} + + /// @brief Make sure that the "ffs" function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + // Just make sure this has 2 arguments + return F->arg_size() == 1 && F->getReturnType() == Type::Int32Ty; + } + + /// @brief Perform the ffs optimization. + virtual bool OptimizeCall(CallInst *TheCall, SimplifyLibCalls &SLC) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(TheCall->getOperand(1))) { + // ffs(cnst) -> bit# + // ffsl(cnst) -> bit# + // ffsll(cnst) -> bit# + uint64_t val = CI->getZExtValue(); + int result = 0; + if (val) { + ++result; + while ((val & 1) == 0) { + ++result; + val >>= 1; + } + } + return ReplaceCallWith(TheCall, ConstantInt::get(Type::Int32Ty, result)); + } + + // ffs(x) -> x == 0 ? 0 : llvm.cttz(x)+1 + // ffsl(x) -> x == 0 ? 0 : llvm.cttz(x)+1 + // ffsll(x) -> x == 0 ? 0 : llvm.cttz(x)+1 + const Type *ArgType = TheCall->getOperand(1)->getType(); + const char *CTTZName; + assert(ArgType->getTypeID() == Type::IntegerTyID && + "llvm.cttz argument is not an integer?"); + unsigned BitWidth = cast<IntegerType>(ArgType)->getBitWidth(); + if (BitWidth == 8) + CTTZName = "llvm.cttz.i8"; + else if (BitWidth == 16) + CTTZName = "llvm.cttz.i16"; + else if (BitWidth == 32) + CTTZName = "llvm.cttz.i32"; + else { + assert(BitWidth == 64 && "Unknown bitwidth"); + CTTZName = "llvm.cttz.i64"; + } + + Constant *F = SLC.getModule()->getOrInsertFunction(CTTZName, ArgType, + ArgType, NULL); + Value *V = CastInst::createIntegerCast(TheCall->getOperand(1), ArgType, + false/*ZExt*/, "tmp", TheCall); + Value *V2 = new CallInst(F, V, "tmp", TheCall); + V2 = CastInst::createIntegerCast(V2, Type::Int32Ty, false/*ZExt*/, + "tmp", TheCall); + V2 = BinaryOperator::createAdd(V2, ConstantInt::get(Type::Int32Ty, 1), + "tmp", TheCall); + Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, V, + Constant::getNullValue(V->getType()), "tmp", + TheCall); + V2 = new SelectInst(Cond, ConstantInt::get(Type::Int32Ty, 0), V2, + TheCall->getName(), TheCall); + return ReplaceCallWith(TheCall, V2); + } +} FFSOptimizer; + +/// This LibCallOptimization will simplify calls to the "ffsl" library +/// calls. It simply uses FFSOptimization for which the transformation is +/// identical. +/// @brief Simplify the ffsl library function. +struct VISIBILITY_HIDDEN FFSLOptimization : public FFSOptimization { +public: + /// @brief Default Constructor + FFSLOptimization() : FFSOptimization("ffsl", + "Number of 'ffsl' calls simplified") {} + +} FFSLOptimizer; + +/// This LibCallOptimization will simplify calls to the "ffsll" library +/// calls. It simply uses FFSOptimization for which the transformation is +/// identical. +/// @brief Simplify the ffsl library function. +struct VISIBILITY_HIDDEN FFSLLOptimization : public FFSOptimization { +public: + /// @brief Default Constructor + FFSLLOptimization() : FFSOptimization("ffsll", + "Number of 'ffsll' calls simplified") {} + +} FFSLLOptimizer; + +/// This optimizes unary functions that take and return doubles. +struct UnaryDoubleFPOptimizer : public LibCallOptimization { + UnaryDoubleFPOptimizer(const char *Fn, const char *Desc) + : LibCallOptimization(Fn, Desc) {} + + // Make sure that this function has the right prototype + virtual bool ValidateCalledFunction(const Function *F, SimplifyLibCalls &SLC){ + return F->arg_size() == 1 && F->arg_begin()->getType() == Type::DoubleTy && + F->getReturnType() == Type::DoubleTy; + } + + /// ShrinkFunctionToFloatVersion - If the input to this function is really a + /// float, strength reduce this to a float version of the function, + /// e.g. floor((double)FLT) -> (double)floorf(FLT). This can only be called + /// when the target supports the destination function and where there can be + /// no precision loss. + static bool ShrinkFunctionToFloatVersion(CallInst *CI, SimplifyLibCalls &SLC, + Constant *(SimplifyLibCalls::*FP)()){ + if (FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1))) + if (Cast->getOperand(0)->getType() == Type::FloatTy) { + Value *New = new CallInst((SLC.*FP)(), Cast->getOperand(0), + CI->getName(), CI); + New = new FPExtInst(New, Type::DoubleTy, CI->getName(), CI); + CI->replaceAllUsesWith(New); + CI->eraseFromParent(); + if (Cast->use_empty()) + Cast->eraseFromParent(); + return true; + } + return false; + } +}; + + +struct VISIBILITY_HIDDEN FloorOptimization : public UnaryDoubleFPOptimizer { + FloorOptimization() + : UnaryDoubleFPOptimizer("floor", "Number of 'floor' calls simplified") {} + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { +#ifdef HAVE_FLOORF + // If this is a float argument passed in, convert to floorf. + if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_floorf)) + return true; +#endif + return false; // opt failed + } +} FloorOptimizer; + +struct VISIBILITY_HIDDEN CeilOptimization : public UnaryDoubleFPOptimizer { + CeilOptimization() + : UnaryDoubleFPOptimizer("ceil", "Number of 'ceil' calls simplified") {} + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { +#ifdef HAVE_CEILF + // If this is a float argument passed in, convert to ceilf. + if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_ceilf)) + return true; +#endif + return false; // opt failed + } +} CeilOptimizer; + +struct VISIBILITY_HIDDEN RoundOptimization : public UnaryDoubleFPOptimizer { + RoundOptimization() + : UnaryDoubleFPOptimizer("round", "Number of 'round' calls simplified") {} + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { +#ifdef HAVE_ROUNDF + // If this is a float argument passed in, convert to roundf. + if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_roundf)) + return true; +#endif + return false; // opt failed + } +} RoundOptimizer; + +struct VISIBILITY_HIDDEN RintOptimization : public UnaryDoubleFPOptimizer { + RintOptimization() + : UnaryDoubleFPOptimizer("rint", "Number of 'rint' calls simplified") {} + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { +#ifdef HAVE_RINTF + // If this is a float argument passed in, convert to rintf. + if (ShrinkFunctionToFloatVersion(CI, SLC, &SimplifyLibCalls::get_rintf)) + return true; +#endif + return false; // opt failed + } +} RintOptimizer; + +struct VISIBILITY_HIDDEN NearByIntOptimization : public UnaryDoubleFPOptimizer { + NearByIntOptimization() + : UnaryDoubleFPOptimizer("nearbyint", + "Number of 'nearbyint' calls simplified") {} + + virtual bool OptimizeCall(CallInst *CI, SimplifyLibCalls &SLC) { +#ifdef HAVE_NEARBYINTF + // If this is a float argument passed in, convert to nearbyintf. + if (ShrinkFunctionToFloatVersion(CI, SLC,&SimplifyLibCalls::get_nearbyintf)) + return true; +#endif + return false; // opt failed + } +} NearByIntOptimizer; + +/// GetConstantStringInfo - This function computes the length of a +/// null-terminated constant array of integers. This function can't rely on the +/// size of the constant array because there could be a null terminator in the +/// middle of the array. +/// +/// We also have to bail out if we find a non-integer constant initializer +/// of one of the elements or if there is no null-terminator. The logic +/// below checks each of these conditions and will return true only if all +/// conditions are met. If the conditions aren't met, this returns false. +/// +/// If successful, the \p Array param is set to the constant array being +/// indexed, the \p Length parameter is set to the length of the null-terminated +/// string pointed to by V, the \p StartIdx value is set to the first +/// element of the Array that V points to, and true is returned. +static bool GetConstantStringInfo(Value *V, std::string &Str) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + if (BCI->getType() == BCI->getOperand(0)->getType()) + return GetConstantStringInfo(BCI->getOperand(0), Str); + return false; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return false because ConstantArray can't occur + // any other way + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return false; + GEP = CE; + } else { + return false; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { + if (!Idx->isZero()) + return false; + } else + return false; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return false; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer()) + return false; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case + if (isa<ConstantAggregateZero>(GlobalInit)) { + // This is a degenerate case. The initializer is constant zero so the + // length of the string must be zero. + Str.clear(); + return true; + } + + // Must be a Constant Array + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (!Array) return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i < NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return false; + if (CI->isZero()) + return true; // we found end of string, success! + Str += (char)CI->getZExtValue(); + } + + return false; // The array isn't null terminated. +} + +/// CastToCStr - Return V if it is an sbyte*, otherwise cast it to sbyte*, +/// inserting the cast before IP, and return the cast. +/// @brief Cast a value to a "C" string. +static Value *CastToCStr(Value *V, Instruction *IP) { + assert(isa<PointerType>(V->getType()) && + "Can't cast non-pointer type to C string type"); + const Type *SBPTy = PointerType::get(Type::Int8Ty); + if (V->getType() != SBPTy) + return new BitCastInst(V, SBPTy, V->getName(), IP); + return V; +} + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// cos, cosf, cosl: +// * cos(-x) -> cos(x) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// memcmp: +// * memcmp(x,y,l) -> cnst +// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l) +// +// memmove: +// * memmove(d,s,l,a) -> memcpy(d,s,l,a) +// (if s is a global constant array) +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// puts: +// * puts("") -> putchar("\n") +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// stpcpy: +// * stpcpy(str, "literal") -> +// llvm.memcpy(str,"literal",strlen("literal")+1,1) +// strrchr: +// * strrchr(s,c) -> reverse_offset_of_in(c,s) +// (if c is a constant integer and s is a constant string) +// * strrchr(s1,0) -> strchr(s1,0) +// +// strncat: +// * strncat(x,y,0) -> x +// * strncat(x,y,0) -> x (if strlen(y) = 0) +// * strncat(x,y,l) -> strcat(x,y) (if y and l are constants an l > strlen(y)) +// +// strncpy: +// * strncpy(d,s,0) -> d +// * strncpy(d,s,l) -> memcpy(d,s,l,1) +// (if s and l are constants) +// +// strpbrk: +// * strpbrk(s,a) -> offset_in_for(s,a) +// (if s and a are both constant strings) +// * strpbrk(s,"") -> 0 +// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1) +// +// strspn, strcspn: +// * strspn(s,a) -> const_int (if both args are constant) +// * strspn("",a) -> 0 +// * strspn(s,"") -> 0 +// * strcspn(s,a) -> const_int (if both args are constant) +// * strcspn("",a) -> 0 +// * strcspn(s,"") -> strlen(a) +// +// strstr: +// * strstr(x,x) -> x +// * strstr(s1,s2) -> offset_of_s2_in(s1) +// (if s1 and s2 are constant strings) +// +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// +} diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp new file mode 100644 index 0000000..9851b26 --- /dev/null +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -0,0 +1,70 @@ +//===-- StripDeadPrototypes.cpp - Removed unused function declarations ----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for +// dead declarations and removes them. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strip-dead-prototypes" +#include "llvm/Transforms/IPO.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); + +namespace { + +/// @brief Pass to remove unused function declarations. +class VISIBILITY_HIDDEN StripDeadPrototypesPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesPass() : ModulePass((intptr_t)&ID) { } + virtual bool runOnModule(Module &M); +}; + +char StripDeadPrototypesPass::ID = 0; +RegisterPass<StripDeadPrototypesPass> X("strip-dead-prototypes", + "Strip Unused Function Prototypes"); + +} // end anonymous namespace + +bool StripDeadPrototypesPass::runOnModule(Module &M) { + bool MadeChange = false; + + // Erase dead function prototypes. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { + Function *F = I++; + // Function must be a prototype and unused. + if (F->isDeclaration() && F->use_empty()) { + F->eraseFromParent(); + ++NumDeadPrototypes; + MadeChange = true; + } + } + + // Erase dead global var prototypes. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ) { + GlobalVariable *GV = I++; + // Global must be a prototype and unused. + if (GV->isDeclaration() && GV->use_empty()) + GV->eraseFromParent(); + } + + // Return an indication of whether we changed anything or not. + return MadeChange; +} + +ModulePass *llvm::createStripDeadPrototypesPass() { + return new StripDeadPrototypesPass(); +} diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp new file mode 100644 index 0000000..c8f8926 --- /dev/null +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -0,0 +1,206 @@ +//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements stripping symbols out of symbol tables. +// +// Specifically, this allows you to strip all of the symbols out of: +// * All functions in a module +// * All non-essential symbols in a module (all function symbols + all module +// scope symbols) +// * Debug information. +// +// Notice that: +// * This pass makes code much less readable, so it should only be used in +// situations where the 'strip' utility would be used (such as reducing +// code size, and making it harder to reverse engineer code). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN StripSymbols : public ModulePass { + bool OnlyDebugInfo; + public: + static char ID; // Pass identification, replacement for typeid + StripSymbols(bool ODI = false) + : ModulePass((intptr_t)&ID), OnlyDebugInfo(ODI) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + + char StripSymbols::ID = 0; + RegisterPass<StripSymbols> X("strip", "Strip all symbols from a module"); +} + +ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { + return new StripSymbols(OnlyDebugInfo); +} + +static void RemoveDeadConstant(Constant *C) { + assert(C->use_empty() && "Constant is not dead!"); + std::vector<Constant*> Operands; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + if (isa<DerivedType>(C->getOperand(i)->getType()) && + C->getOperand(i)->hasOneUse()) + Operands.push_back(C->getOperand(i)); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (!GV->hasInternalLinkage()) return; // Don't delete non static globals. + GV->eraseFromParent(); + } + else if (!isa<Function>(C)) + C->destroyConstant(); + + // If the constant referenced anything, see if we can delete it as well. + while (!Operands.empty()) { + RemoveDeadConstant(Operands.back()); + Operands.pop_back(); + } +} + +// Strip the symbol table of its names. +// +static void StripSymtab(ValueSymbolTable &ST) { + for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) { + Value *V = VI->getValue(); + ++VI; + if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasInternalLinkage()) { + // Set name to "", removing from symbol table! + V->setName(""); + } + } +} + +// Strip the symbol table of its names. +static void StripTypeSymtab(TypeSymbolTable &ST) { + for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) + ST.remove(TI++); +} + + + +bool StripSymbols::runOnModule(Module &M) { + // If we're not just stripping debug info, strip all symbols from the + // functions and the names from any internal globals. + if (!OnlyDebugInfo) { + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasInternalLinkage()) + I->setName(""); // Internal symbols can't participate in linkage + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->hasInternalLinkage()) + I->setName(""); // Internal symbols can't participate in linkage + StripSymtab(I->getValueSymbolTable()); + } + + // Remove all names from types. + StripTypeSymtab(M.getTypeSymbolTable()); + } + + // Strip debug info in the module if it exists. To do this, we remove + // llvm.dbg.func.start, llvm.dbg.stoppoint, and llvm.dbg.region.end calls, and + // any globals they point to if now dead. + Function *FuncStart = M.getFunction("llvm.dbg.func.start"); + Function *StopPoint = M.getFunction("llvm.dbg.stoppoint"); + Function *RegionStart = M.getFunction("llvm.dbg.region.start"); + Function *RegionEnd = M.getFunction("llvm.dbg.region.end"); + Function *Declare = M.getFunction("llvm.dbg.declare"); + if (!FuncStart && !StopPoint && !RegionStart && !RegionEnd && !Declare) + return true; + + std::vector<GlobalVariable*> DeadGlobals; + + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. + if (FuncStart) { + while (!FuncStart->use_empty()) { + CallInst *CI = cast<CallInst>(FuncStart->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Arg)) + DeadGlobals.push_back(GV); + } + FuncStart->eraseFromParent(); + } + if (StopPoint) { + while (!StopPoint->use_empty()) { + CallInst *CI = cast<CallInst>(StopPoint->use_back()); + Value *Arg = CI->getOperand(3); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Arg)) + DeadGlobals.push_back(GV); + } + StopPoint->eraseFromParent(); + } + if (RegionStart) { + while (!RegionStart->use_empty()) { + CallInst *CI = cast<CallInst>(RegionStart->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Arg)) + DeadGlobals.push_back(GV); + } + RegionStart->eraseFromParent(); + } + if (RegionEnd) { + while (!RegionEnd->use_empty()) { + CallInst *CI = cast<CallInst>(RegionEnd->use_back()); + Value *Arg = CI->getOperand(1); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Arg)) + DeadGlobals.push_back(GV); + } + RegionEnd->eraseFromParent(); + } + if (Declare) { + while (!Declare->use_empty()) { + CallInst *CI = cast<CallInst>(Declare->use_back()); + Value *Arg = CI->getOperand(2); + assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); + CI->eraseFromParent(); + if (Arg->use_empty()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Arg)) + DeadGlobals.push_back(GV); + } + Declare->eraseFromParent(); + } + + // Finally, delete any internal globals that were only used by the debugger + // intrinsics. + while (!DeadGlobals.empty()) { + GlobalVariable *GV = DeadGlobals.back(); + DeadGlobals.pop_back(); + if (GV->hasInternalLinkage()) + RemoveDeadConstant(GV); + } + + return true; +} |