diff options
author | Daniel Berlin <dberlin@dberlin.org> | 2007-09-16 21:45:02 +0000 |
---|---|---|
committer | Daniel Berlin <dberlin@dberlin.org> | 2007-09-16 21:45:02 +0000 |
commit | 385bda604143bf1c0fe00bb7d524894ee237d770 (patch) | |
tree | f17c2665cc5bdfef3e962f07b490355d3e736061 /lib | |
parent | 319cd95ba898339254c961816d4ea5a3c8702c91 (diff) | |
download | external_llvm-385bda604143bf1c0fe00bb7d524894ee237d770.zip external_llvm-385bda604143bf1c0fe00bb7d524894ee237d770.tar.gz external_llvm-385bda604143bf1c0fe00bb7d524894ee237d770.tar.bz2 |
Rewrite of andersen's to be about 100x faster, cleaner, and begin to support field sensitivity
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42016 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Analysis/IPA/Andersens.cpp | 1011 |
1 files changed, 687 insertions, 324 deletions
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 4c0d246..fed2460 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -7,14 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file defines a very simple implementation of Andersen's interprocedural -// alias analysis. This implementation does not include any of the fancy -// features that make Andersen's reasonably efficient (like cycle elimination or -// variable substitution), but it should be useful for getting precision -// numbers and can be extended in the future. +// This file defines an implementation of Andersen's interprocedural alias +// analysis // // In pointer analysis terms, this is a subset-based, flow-insensitive, -// field-insensitive, and context-insensitive algorithm pointer algorithm. +// field-sensitive, and context-insensitive algorithm pointer algorithm. // // This algorithm is implemented as three stages: // 1. Object identification. @@ -29,24 +26,23 @@ // in the program by scanning the program, looking for pointer assignments and // other statements that effect the points-to graph. For a statement like "A = // B", this statement is processed to indicate that A can point to anything that -// B can point to. Constraints can handle copies, loads, and stores. +// B can point to. Constraints can handle copies, loads, and stores, and +// address taking. // // The inclusion constraint solving phase iteratively propagates the inclusion // constraints until a fixed point is reached. This is an O(N^3) algorithm. // -// In the initial pass, all indirect function calls are completely ignored. As -// the analysis discovers new targets of function pointers, it iteratively -// resolves a precise (and conservative) call graph. Also related, this -// analysis initially assumes that all internal functions have known incoming -// pointers. If we find that an internal function's address escapes outside of -// the program, we update this assumption. +// Function constraints are handled as if they were structs with X fields. +// Thus, an access to argument X of function Y is an access to node index +// getNode(Y) + X. This representation allows handling of indirect calls +// without any issues. To wit, an indirect call Y(a,b) is equivalence to +// *(Y + 1) = a, *(Y + 2) = b. +// The return node for a function is always located at getNode(F) + +// CallReturnPos. The arguments start at getNode(F) + CallArgPos. // // Future Improvements: -// This implementation of Andersen's algorithm is extremely slow. To make it -// scale reasonably well, the inclusion constraints could be sorted (easy), -// offline variable substitution would be a huge win (straight-forward), and -// online cycle elimination (trickier) might help as well. -// +// Offline variable substitution, offline detection of online +// cycles. Use of BDD's. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "anders-aa" @@ -62,31 +58,77 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SparseBitVector.h" #include <algorithm> #include <set> -using namespace llvm; +#include <list> +#include <stack> +#include <vector> +using namespace llvm; STATISTIC(NumIters , "Number of iterations to reach convergence"); STATISTIC(NumConstraints , "Number of constraints"); STATISTIC(NumNodes , "Number of nodes"); -STATISTIC(NumEscapingFunctions, "Number of internal functions that escape"); -STATISTIC(NumIndirectCallees , "Number of indirect callees found"); +STATISTIC(NumUnified , "Number of variables unified"); namespace { + const unsigned SelfRep = (unsigned)-1; + const unsigned Unvisited = (unsigned)-1; + // Position of the function return node relative to the function node. + const unsigned CallReturnPos = 2; + // Position of the function call node relative to the function node. + const unsigned CallFirstArgPos = 3; + class VISIBILITY_HIDDEN Andersens : public ModulePass, public AliasAnalysis, private InstVisitor<Andersens> { - public: - static char ID; // Class identification, replacement for typeinfo - Andersens() : ModulePass((intptr_t)&ID) {} - private: - /// Node class - This class is used to represent a memory object in the - /// program, and is the primitive used to build the points-to graph. - class Node { - std::vector<Node*> Pointees; - Value *Val; - public: - static const unsigned ID; // Pass identification, replacement for typeid - Node() : Val(0) {} + class Node; + + /// Constraint - Objects of this structure are used to represent the various + /// constraints identified by the algorithm. The constraints are 'copy', + /// for statements like "A = B", 'load' for statements like "A = *B", + /// 'store' for statements like "*A = B", and AddressOf for statements like + /// A = alloca; The Offset is applied as *(A + K) = B for stores, + /// A = *(B + K) for loads, and A = B + K for copies. It is + /// illegal on addressof constraints (Because it is statically + /// resolvable to A = &C where C = B + K) + + struct Constraint { + enum ConstraintType { Copy, Load, Store, AddressOf } Type; + unsigned Dest; + unsigned Src; + unsigned Offset; + + Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0) + : Type(Ty), Dest(D), Src(S), Offset(O) { + assert(Offset == 0 || Ty != AddressOf && + "Offset is illegal on addressof constraints"); + } + }; + + // Node class - This class is used to represent a node + // in the constraint graph. Due to various optimizations, + // not always the case that there is a mapping from a Node to a + // Value. In particular, we add artificial + // Node's that represent the set of pointed-to variables + // shared for each location equivalent Node. + struct Node { + Value *Val; + SparseBitVector<> *Edges; + SparseBitVector<> *PointsTo; + SparseBitVector<> *OldPointsTo; + bool Changed; + std::list<Constraint> Constraints; + + // Nodes in cycles (or in equivalence classes) are united + // together using a standard union-find representation with path + // compression. NodeRep gives the index into GraphNodes + // representative for this one. + unsigned NodeRep; public: + + Node() : Val(0), Edges(0), PointsTo(0), OldPointsTo(0), Changed(false), + NodeRep(SelfRep) { + } + Node *setValue(Value *V) { assert(Val == 0 && "Value already set for this node!"); Val = V; @@ -97,21 +139,11 @@ namespace { /// Value *getValue() const { return Val; } - typedef std::vector<Node*>::const_iterator iterator; - iterator begin() const { return Pointees.begin(); } - iterator end() const { return Pointees.end(); } - /// addPointerTo - Add a pointer to the list of pointees of this node, /// returning true if this caused a new pointer to be added, or false if /// we already knew about the points-to relation. - bool addPointerTo(Node *N) { - std::vector<Node*>::iterator I = std::lower_bound(Pointees.begin(), - Pointees.end(), - N); - if (I != Pointees.end() && *I == N) - return false; - Pointees.insert(I, N); - return true; + bool addPointerTo(unsigned Node) { + return PointsTo->test_and_set(Node); } /// intersects - Return true if the points-to set of this node intersects @@ -121,12 +153,7 @@ namespace { /// intersectsIgnoring - Return true if the points-to set of this node /// intersects with the points-to set of the specified node on any nodes /// except for the specified node to ignore. - bool intersectsIgnoring(Node *N, Node *Ignoring) const; - - // Constraint application methods. - bool copyFrom(Node *N); - bool loadFrom(Node *N); - bool storeThrough(Node *N); + bool intersectsIgnoring(Node *N, unsigned) const; }; /// GraphNodes - This vector is populated as part of the object @@ -152,41 +179,14 @@ namespace { /// take variable arguments. std::map<Function*, unsigned> VarargNodes; - /// Constraint - Objects of this structure are used to represent the various - /// constraints identified by the algorithm. The constraints are 'copy', - /// for statements like "A = B", 'load' for statements like "A = *B", and - /// 'store' for statements like "*A = B". - struct Constraint { - enum ConstraintType { Copy, Load, Store } Type; - Node *Dest, *Src; - - Constraint(ConstraintType Ty, Node *D, Node *S) - : Type(Ty), Dest(D), Src(S) {} - }; /// Constraints - This vector contains a list of all of the constraints /// identified by the program. std::vector<Constraint> Constraints; - /// EscapingInternalFunctions - This set contains all of the internal - /// functions that are found to escape from the program. If the address of - /// an internal function is passed to an external function or otherwise - /// escapes from the analyzed portion of the program, we must assume that - /// any pointer arguments can alias the universal node. This set keeps - /// track of those functions we are assuming to escape so far. - std::set<Function*> EscapingInternalFunctions; - - /// IndirectCalls - This contains a list of all of the indirect call sites - /// in the program. Since the call graph is iteratively discovered, we may - /// need to add constraints to our graph as we find new targets of function - /// pointers. - std::vector<CallSite> IndirectCalls; - - /// IndirectCallees - For each call site in the indirect calls list, keep - /// track of the callees that we have discovered so far. As the analysis - /// proceeds, more callees are discovered, until the call graph finally - /// stabilizes. - std::map<CallSite, std::vector<Function*> > IndirectCallees; + // Map from graph node to maximum K value that is allowed (For functions, + // this is equivalent to the number of arguments + CallFirstArgPos) + std::map<unsigned, unsigned> MaxK; /// This enum defines the GraphNodes indices that correspond to important /// fixed sets. @@ -195,8 +195,24 @@ namespace { NullPtr = 1, NullObject = 2 }; + // Stack for Tarjans + std::stack<unsigned> SCCStack; + // Topological Index -> Graph node + std::vector<unsigned> Topo2Node; + // Graph Node -> Topological Index; + std::vector<unsigned> Node2Topo; + // Map from Graph Node to DFS number + std::vector<unsigned> Node2DFS; + // Map from Graph Node to Deleted from graph. + std::vector<bool> Node2Deleted; + // Current DFS and RPO numbers + unsigned DFSNumber; + unsigned RPONumber; public: + static char ID; + Andersens() : ModulePass((intptr_t)&ID) {} + bool runOnModule(Module &M) { InitializeAliasAnalysis(this); IdentifyObjects(M); @@ -210,7 +226,6 @@ namespace { ObjectNodes.clear(); ReturnNodes.clear(); VarargNodes.clear(); - EscapingInternalFunctions.clear(); std::vector<Constraint>().swap(Constraints); return false; } @@ -255,7 +270,7 @@ namespace { private: /// getNode - Return the node corresponding to the specified pointer scalar. /// - Node *getNode(Value *V) { + unsigned getNode(Value *V) { if (Constant *C = dyn_cast<Constant>(V)) if (!isa<GlobalValue>(C)) return getNodeForConstantPointer(C); @@ -267,47 +282,55 @@ namespace { #endif assert(0 && "Value does not have a node in the points-to graph!"); } - return &GraphNodes[I->second]; + return I->second; } /// getObject - Return the node corresponding to the memory object for the /// specified global or allocation instruction. - Node *getObject(Value *V) { + unsigned getObject(Value *V) { std::map<Value*, unsigned>::iterator I = ObjectNodes.find(V); assert(I != ObjectNodes.end() && "Value does not have an object in the points-to graph!"); - return &GraphNodes[I->second]; + return I->second; } /// getReturnNode - Return the node representing the return value for the /// specified function. - Node *getReturnNode(Function *F) { + unsigned getReturnNode(Function *F) { std::map<Function*, unsigned>::iterator I = ReturnNodes.find(F); assert(I != ReturnNodes.end() && "Function does not return a value!"); - return &GraphNodes[I->second]; + return I->second; } /// getVarargNode - Return the node representing the variable arguments /// formal for the specified function. - Node *getVarargNode(Function *F) { + unsigned getVarargNode(Function *F) { std::map<Function*, unsigned>::iterator I = VarargNodes.find(F); assert(I != VarargNodes.end() && "Function does not take var args!"); - return &GraphNodes[I->second]; + return I->second; } /// getNodeValue - Get the node for the specified LLVM value and set the /// value for it to be the specified value. - Node *getNodeValue(Value &V) { - return getNode(&V)->setValue(&V); + unsigned getNodeValue(Value &V) { + unsigned Index = getNode(&V); + GraphNodes[Index].setValue(&V); + return Index; } + unsigned UniteNodes(unsigned First, unsigned Second); + unsigned FindNode(unsigned Node); + void IdentifyObjects(Module &M); void CollectConstraints(Module &M); + bool AnalyzeUsesOfFunction(Value *); + void CreateConstraintGraph(); void SolveConstraints(); + void QueryNode(unsigned Node); - Node *getNodeForConstantPointer(Constant *C); - Node *getNodeForConstantPointerTarget(Constant *C); - void AddGlobalInitializerConstraints(Node *N, Constant *C); + unsigned getNodeForConstantPointer(Constant *C); + unsigned getNodeForConstantPointerTarget(Constant *C); + void AddGlobalInitializerConstraints(unsigned, Constant *C); void AddConstraintsForNonInternalLinkage(Function *F); void AddConstraintsForCall(CallSite CS, Function *F); @@ -337,6 +360,7 @@ namespace { void visitSelectInst(SelectInst &SI); void visitVAArg(VAArgInst &I); void visitInstruction(Instruction &I); + }; char Andersens::ID = 0; @@ -353,12 +377,12 @@ ModulePass *llvm::createAndersensPass() { return new Andersens(); } AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { - Node *N1 = getNode(const_cast<Value*>(V1)); - Node *N2 = getNode(const_cast<Value*>(V2)); + Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))]; + Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))]; // Check to see if the two pointers are known to not alias. They don't alias // if their points-to sets do not intersect. - if (!N1->intersectsIgnoring(N2, &GraphNodes[NullObject])) + if (!N1->intersectsIgnoring(N2, NullObject)) return NoAlias; return AliasAnalysis::alias(V1, V1Size, V2, V2Size); @@ -376,14 +400,12 @@ Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) { // is, after all, a "research quality" implementation of Andersen's analysis. if (Function *F = CS.getCalledFunction()) if (F->isDeclaration()) { - Node *N1 = getNode(P); + Node *N1 = &GraphNodes[FindNode(getNode(P))]; - if (N1->begin() == N1->end()) - return NoModRef; // P doesn't point to anything. + if (N1->PointsTo->empty()) + return NoModRef; - // Get the first pointee. - Node *FirstPointee = *N1->begin(); - if (FirstPointee != &GraphNodes[UniversalSet]) + if (!N1->PointsTo->test(UniversalSet)) return NoModRef; // P doesn't point to the universal set. } @@ -401,30 +423,23 @@ Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { /// variables or any other memory memory objects because we do not track whether /// a pointer points to the beginning of an object or a field of it. void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { - Node *N = getNode(P); - Node::iterator I = N->begin(); - if (I != N->end()) { - // If there is exactly one element in the points-to set for the object... - ++I; - if (I == N->end()) { - Node *Pointee = *N->begin(); - - // If a function is the only object in the points-to set, then it must be - // the destination. Note that we can't handle global variables here, - // because we don't know if the pointer is actually pointing to a field of - // the global or to the beginning of it. - if (Value *V = Pointee->getValue()) { - if (Function *F = dyn_cast<Function>(V)) - RetVals.push_back(F); - } else { - // If the object in the points-to set is the null object, then the null - // pointer is a must alias. - if (Pointee == &GraphNodes[NullObject]) - RetVals.push_back(Constant::getNullValue(P->getType())); - } + Node *N = &GraphNodes[FindNode(getNode(P))]; + if (N->PointsTo->count() == 1) { + Node *Pointee = &GraphNodes[N->PointsTo->find_first()]; + // If a function is the only object in the points-to set, then it must be + // the destination. Note that we can't handle global variables here, + // because we don't know if the pointer is actually pointing to a field of + // the global or to the beginning of it. + if (Value *V = Pointee->getValue()) { + if (Function *F = dyn_cast<Function>(V)) + RetVals.push_back(F); + } else { + // If the object in the points-to set is the null object, then the null + // pointer is a must alias. + if (Pointee == &GraphNodes[NullObject]) + RetVals.push_back(Constant::getNullValue(P->getType())); } } - AliasAnalysis::getMustAliases(P, RetVals); } @@ -434,14 +449,20 @@ void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { /// return true. /// bool Andersens::pointsToConstantMemory(const Value *P) { - Node *N = getNode((Value*)P); - for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) { - if (Value *V = (*I)->getValue()) { + Node *N = &GraphNodes[FindNode(getNode((Value*)P))]; + unsigned i; + + for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); + bi != N->PointsTo->end(); + ++bi) { + i = *bi; + Node *Pointee = &GraphNodes[i]; + if (Value *V = Pointee->getValue()) { if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) && !cast<GlobalVariable>(V)->isConstant())) return AliasAnalysis::pointsToConstantMemory(P); } else { - if (*I != &GraphNodes[NullObject]) + if (i != NullObject) return AliasAnalysis::pointsToConstantMemory(P); } } @@ -483,6 +504,7 @@ void Andersens::IdentifyObjects(Module &M) { // Add nodes for all of the functions and the instructions inside of them. for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { // The function itself is a memory object. + unsigned First = NumObjects; ValueNodes[F] = NumObjects++; ObjectNodes[F] = NumObjects++; if (isa<PointerType>(F->getFunctionType()->getReturnType())) @@ -490,11 +512,14 @@ void Andersens::IdentifyObjects(Module &M) { if (F->getFunctionType()->isVarArg()) VarargNodes[F] = NumObjects++; + // Add nodes for all of the incoming pointer arguments. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (isa<PointerType>(I->getType())) ValueNodes[I] = NumObjects++; + MaxK[First] = NumObjects - First; + MaxK[First + 1] = NumObjects - First - 1; // Scan the function body, creating a memory object for each heap/stack // allocation in the body of the function and a node to represent all @@ -521,11 +546,11 @@ void Andersens::IdentifyObjects(Module &M) { /// getNodeForConstantPointer - Return the node corresponding to the constant /// pointer itself. -Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { +unsigned Andersens::getNodeForConstantPointer(Constant *C) { assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C)) - return &GraphNodes[NullPtr]; + return NullPtr; else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return getNode(GV); else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -533,7 +558,7 @@ Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { case Instruction::GetElementPtr: return getNodeForConstantPointer(CE->getOperand(0)); case Instruction::IntToPtr: - return &GraphNodes[UniversalSet]; + return UniversalSet; case Instruction::BitCast: return getNodeForConstantPointer(CE->getOperand(0)); default: @@ -548,11 +573,11 @@ Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { /// getNodeForConstantPointerTarget - Return the node POINTED TO by the /// specified constant pointer. -Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { +unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); if (isa<ConstantPointerNull>(C)) - return &GraphNodes[NullObject]; + return NullObject; else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return getObject(GV); else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -560,7 +585,7 @@ Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { case Instruction::GetElementPtr: return getNodeForConstantPointerTarget(CE->getOperand(0)); case Instruction::IntToPtr: - return &GraphNodes[UniversalSet]; + return UniversalSet; case Instruction::BitCast: return getNodeForConstantPointerTarget(CE->getOperand(0)); default: @@ -575,19 +600,22 @@ Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { /// AddGlobalInitializerConstraints - Add inclusion constraints for the memory /// object N, which contains values indicated by C. -void Andersens::AddGlobalInitializerConstraints(Node *N, Constant *C) { +void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex, + Constant *C) { if (C->getType()->isFirstClassType()) { if (isa<PointerType>(C->getType())) - N->copyFrom(getNodeForConstantPointer(C)); - + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + getNodeForConstantPointer(C))); } else if (C->isNullValue()) { - N->addPointerTo(&GraphNodes[NullObject]); + Constraints.push_back(Constraint(Constraint::Copy, NodeIndex, + NullObject)); return; } else if (!isa<UndefValue>(C)) { // If this is an array or struct, include constraints for each element. assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C)); for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - AddGlobalInitializerConstraints(N, cast<Constant>(C->getOperand(i))); + AddGlobalInitializerConstraints(NodeIndex, + cast<Constant>(C->getOperand(i))); } } @@ -600,7 +628,7 @@ void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { // If this is an argument of an externally accessible function, the // incoming pointer might point to anything. Constraints.push_back(Constraint(Constraint::Copy, getNode(I), - &GraphNodes[UniversalSet])); + UniversalSet)); } /// AddConstraintsForCall - If this is a call to a "known" function, add the @@ -653,14 +681,20 @@ bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { // These functions do induce points-to edges. - if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" || + if (F->getName() == "llvm.memcpy.i32" || F->getName() == "llvm.memcpy.i64" || F->getName() == "llvm.memmove.i32" ||F->getName() == "llvm.memmove.i64" || F->getName() == "memmove") { - // Note: this is a poor approximation, this says Dest = Src, instead of - // *Dest = *Src. - Constraints.push_back(Constraint(Constraint::Copy, - getNode(CS.getArgument(0)), - getNode(CS.getArgument(1)))); + + // *Dest = *Src, which requires an artificial graph node to represent the + // constraint. It is broken up into *Dest = temp, temp = *Src + unsigned FirstArg = getNode(CS.getArgument(0)); + unsigned SecondArg = getNode(CS.getArgument(1)); + unsigned TempArg = GraphNodes.size(); + GraphNodes.push_back(Node()); + Constraints.push_back(Constraint(Constraint::Store, + FirstArg, TempArg)); + Constraints.push_back(Constraint(Constraint::Load, + TempArg, SecondArg)); return true; } @@ -679,49 +713,99 @@ bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) { +/// AnalyzeUsesOfFunction - Look at all of the users of the specified function. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. +bool Andersens::AnalyzeUsesOfFunction(Value *V) { + + if (!isa<PointerType>(V->getType())) return true; + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (dyn_cast<LoadInst>(*UI)) { + return false; + } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (V == SI->getOperand(1)) { + return false; + } else if (SI->getOperand(1)) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + if (AnalyzeUsesOfFunction(GEP)) return true; + } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) + if (CI->getOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) + if (II->getOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfFunction(CE)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else if (dyn_cast<FreeInst>(*UI)) { + return false; + } else { + return true; + } + return false; +} + /// CollectConstraints - This stage scans the program, adding a constraint to /// the Constraints list for each instruction in the program that induces a /// constraint, and setting up the initial points-to graph. /// void Andersens::CollectConstraints(Module &M) { // First, the universal set points to itself. - GraphNodes[UniversalSet].addPointerTo(&GraphNodes[UniversalSet]); - //Constraints.push_back(Constraint(Constraint::Load, &GraphNodes[UniversalSet], - // &GraphNodes[UniversalSet])); - Constraints.push_back(Constraint(Constraint::Store, &GraphNodes[UniversalSet], - &GraphNodes[UniversalSet])); + Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet, + UniversalSet)); + Constraints.push_back(Constraint(Constraint::Store, UniversalSet, + UniversalSet)); // Next, the null pointer points to the null object. - GraphNodes[NullPtr].addPointerTo(&GraphNodes[NullObject]); + Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject)); // Next, add any constraints on global variables and their initializers. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { // Associate the address of the global object as pointing to the memory for // the global: &G = <G memory> - Node *Object = getObject(I); + unsigned ObjectIndex = getObject(I); + Node *Object = &GraphNodes[ObjectIndex]; Object->setValue(I); - getNodeValue(*I)->addPointerTo(Object); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), + ObjectIndex)); if (I->hasInitializer()) { - AddGlobalInitializerConstraints(Object, I->getInitializer()); + AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); } else { // If it doesn't have an initializer (i.e. it's defined in another // translation unit), it points to the universal set. - Constraints.push_back(Constraint(Constraint::Copy, Object, - &GraphNodes[UniversalSet])); + Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex, + UniversalSet)); } } for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { // Make the function address point to the function object. - getNodeValue(*F)->addPointerTo(getObject(F)->setValue(F)); - + unsigned ObjectIndex = getObject(F); + GraphNodes[ObjectIndex].setValue(F); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*F), + ObjectIndex)); // Set up the return value node. if (isa<PointerType>(F->getFunctionType()->getReturnType())) - getReturnNode(F)->setValue(F); + GraphNodes[getReturnNode(F)].setValue(F); if (F->getFunctionType()->isVarArg()) - getVarargNode(F)->setValue(F); + GraphNodes[getVarargNode(F)].setValue(F); // Set up incoming argument nodes. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); @@ -729,7 +813,10 @@ void Andersens::CollectConstraints(Module &M) { if (isa<PointerType>(I->getType())) getNodeValue(*I); - if (!F->hasInternalLinkage()) + // At some point we should just add constraints for the escaping functions + // at solve time, but this slows down solving. For now, we simply mark + // address taken functions as escaping and treat them as external. + if (!F->hasInternalLinkage() || AnalyzeUsesOfFunction(F)) AddConstraintsForNonInternalLinkage(F); if (!F->isDeclaration()) { @@ -742,7 +829,7 @@ void Andersens::CollectConstraints(Module &M) { if (isa<PointerType>(F->getFunctionType()->getReturnType())) Constraints.push_back(Constraint(Constraint::Copy, getReturnNode(F), - &GraphNodes[UniversalSet])); + UniversalSet)); // Any pointers that are passed into the function have the universal set // stored into them. @@ -752,11 +839,11 @@ void Andersens::CollectConstraints(Module &M) { // Pointers passed into external functions could have anything stored // through them. Constraints.push_back(Constraint(Constraint::Store, getNode(I), - &GraphNodes[UniversalSet])); + UniversalSet)); // Memory objects passed into external function calls can have the // universal set point to them. Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], + UniversalSet, getNode(I))); } @@ -764,7 +851,7 @@ void Andersens::CollectConstraints(Module &M) { // into any pointers passed through the varargs section. if (F->getFunctionType()->isVarArg()) Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), - &GraphNodes[UniversalSet])); + UniversalSet)); } } NumConstraints += Constraints.size(); @@ -795,7 +882,10 @@ void Andersens::visitInstruction(Instruction &I) { } void Andersens::visitAllocationInst(AllocationInst &AI) { - getNodeValue(AI)->addPointerTo(getObject(&AI)->setValue(&AI)); + unsigned ObjectIndex = getObject(&AI); + GraphNodes[ObjectIndex].setValue(&AI); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), + ObjectIndex)); } void Andersens::visitReturnInst(ReturnInst &RI) { @@ -829,7 +919,7 @@ void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { void Andersens::visitPHINode(PHINode &PN) { if (isa<PointerType>(PN.getType())) { - Node *PNN = getNodeValue(PN); + unsigned PNN = getNodeValue(PN); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ... Constraints.push_back(Constraint(Constraint::Copy, PNN, @@ -848,7 +938,7 @@ void Andersens::visitCastInst(CastInst &CI) { // P1 = cast int --> <Copy/P1/Univ> #if 0 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), - &GraphNodes[UniversalSet])); + UniversalSet)); #else getNodeValue(CI); #endif @@ -857,7 +947,7 @@ void Andersens::visitCastInst(CastInst &CI) { // int = cast P1 --> <Copy/Univ/P1> #if 0 Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], + UniversalSet, getNode(CI.getOperand(0)))); #else getNode(CI.getOperand(0)); @@ -867,7 +957,7 @@ void Andersens::visitCastInst(CastInst &CI) { void Andersens::visitSelectInst(SelectInst &SI) { if (isa<PointerType>(SI.getType())) { - Node *SIN = getNodeValue(SI); + unsigned SIN = getNodeValue(SI); // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3> Constraints.push_back(Constraint(Constraint::Copy, SIN, getNode(SI.getOperand(1)))); @@ -886,48 +976,72 @@ void Andersens::visitVAArg(VAArgInst &I) { /// the function pointer has been casted. If this is the case, do something /// reasonable. void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { - // If this is a call to an external function, handle it directly to get some - // taste of context sensitivity. - if (F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) + Value *CallValue = CS.getCalledValue(); + bool IsDeref = F == NULL; + + // If this is a call to an external function, try to handle it directly to get + // some taste of context sensitivity. + if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F)) return; if (isa<PointerType>(CS.getType())) { - Node *CSN = getNode(CS.getInstruction()); - if (isa<PointerType>(F->getFunctionType()->getReturnType())) { - Constraints.push_back(Constraint(Constraint::Copy, CSN, - getReturnNode(F))); + unsigned CSN = getNode(CS.getInstruction()); + if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) { + if (IsDeref) + Constraints.push_back(Constraint(Constraint::Load, CSN, + getNode(CallValue), CallReturnPos)); + else + Constraints.push_back(Constraint(Constraint::Copy, CSN, + getNode(CallValue) + CallReturnPos)); } else { // If the function returns a non-pointer value, handle this just like we // treat a nonpointer cast to pointer. Constraints.push_back(Constraint(Constraint::Copy, CSN, - &GraphNodes[UniversalSet])); + UniversalSet)); } - } else if (isa<PointerType>(F->getFunctionType()->getReturnType())) { + } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) { Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], - getReturnNode(F))); + UniversalSet, + getNode(CallValue) + CallReturnPos)); } - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); - for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) - if (isa<PointerType>(AI->getType())) { + if (F) { + // Direct Call + Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) + if (isa<PointerType>(AI->getType())) { + if (isa<PointerType>((*ArgI)->getType())) { + // Copy the actual argument into the formal argument. + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + getNode(*ArgI))); + } else { + Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), + UniversalSet)); + } + } else if (isa<PointerType>((*ArgI)->getType())) { + Constraints.push_back(Constraint(Constraint::Copy, + UniversalSet, + getNode(*ArgI))); + } + } else { + //Indirect Call + unsigned ArgPos = CallFirstArgPos; + for (; ArgI != ArgE; ++ArgI) { if (isa<PointerType>((*ArgI)->getType())) { // Copy the actual argument into the formal argument. - Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), - getNode(*ArgI))); + Constraints.push_back(Constraint(Constraint::Store, + getNode(CallValue), + getNode(*ArgI), ArgPos++)); } else { - Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), - &GraphNodes[UniversalSet])); + Constraints.push_back(Constraint(Constraint::Store, + getNode (CallValue), + UniversalSet, ArgPos++)); } - } else if (isa<PointerType>((*ArgI)->getType())) { - Constraints.push_back(Constraint(Constraint::Copy, - &GraphNodes[UniversalSet], - getNode(*ArgI))); } - + } // Copy all pointers passed through the varargs section to the varargs node. - if (F->getFunctionType()->isVarArg()) + if (F && F->getFunctionType()->isVarArg()) for (; ArgI != ArgE; ++ArgI) if (isa<PointerType>((*ArgI)->getType())) Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F), @@ -942,9 +1056,7 @@ void Andersens::visitCallSite(CallSite CS) { if (Function *F = CS.getCalledFunction()) { AddConstraintsForCall(CS, F); } else { - // We don't handle indirect call sites yet. Keep track of them for when we - // discover the call graph incrementally. - IndirectCalls.push_back(CS); + AddConstraintsForCall(CS, NULL); } } @@ -955,74 +1067,109 @@ void Andersens::visitCallSite(CallSite CS) { /// intersects - Return true if the points-to set of this node intersects /// with the points-to set of the specified node. bool Andersens::Node::intersects(Node *N) const { - iterator I1 = begin(), I2 = N->begin(), E1 = end(), E2 = N->end(); - while (I1 != E1 && I2 != E2) { - if (*I1 == *I2) return true; - if (*I1 < *I2) - ++I1; - else - ++I2; - } - return false; + return PointsTo->intersects(N->PointsTo); } /// intersectsIgnoring - Return true if the points-to set of this node /// intersects with the points-to set of the specified node on any nodes /// except for the specified node to ignore. -bool Andersens::Node::intersectsIgnoring(Node *N, Node *Ignoring) const { - iterator I1 = begin(), I2 = N->begin(), E1 = end(), E2 = N->end(); - while (I1 != E1 && I2 != E2) { - if (*I1 == *I2) { - if (*I1 != Ignoring) return true; - ++I1; ++I2; - } else if (*I1 < *I2) - ++I1; +bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { + // TODO: If we are only going to call this with the same value for Ignoring, + // we should move the special values out of the points-to bitmap. + bool WeHadIt = PointsTo->test(Ignoring); + bool NHadIt = N->PointsTo->test(Ignoring); + bool Result = false; + if (WeHadIt) + PointsTo->reset(Ignoring); + if (NHadIt) + N->PointsTo->reset(Ignoring); + Result = PointsTo->intersects(N->PointsTo); + if (WeHadIt) + PointsTo->set(Ignoring); + if (NHadIt) + N->PointsTo->set(Ignoring); + return Result; +} + +// Create the constraint graph used for solving points-to analysis. +// +void Andersens::CreateConstraintGraph() { + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { + Constraint &C = Constraints[i]; + assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size()); + if (C.Type == Constraint::AddressOf) + GraphNodes[C.Dest].PointsTo->set(C.Src); + else if (C.Type == Constraint::Load) + GraphNodes[C.Src].Constraints.push_back(C); + else if (C.Type == Constraint::Store) + GraphNodes[C.Dest].Constraints.push_back(C); + else if (C.Offset != 0) + GraphNodes[C.Src].Constraints.push_back(C); else - ++I2; + GraphNodes[C.Src].Edges->set(C.Dest); } - return false; } -// Copy constraint: all edges out of the source node get copied to the -// destination node. This returns true if a change is made. -bool Andersens::Node::copyFrom(Node *N) { - // Use a mostly linear-time merge since both of the lists are sorted. - bool Changed = false; - iterator I = N->begin(), E = N->end(); - unsigned i = 0; - while (I != E && i != Pointees.size()) { - if (Pointees[i] < *I) { - ++i; - } else if (Pointees[i] == *I) { - ++i; ++I; - } else { - // We found a new element to copy over. - Changed = true; - Pointees.insert(Pointees.begin()+i, *I); - ++i; ++I; +// Perform cycle detection, DFS, and RPO finding. +void Andersens::QueryNode(unsigned Node) { + assert(GraphNodes[Node].NodeRep == SelfRep && "Querying a non-rep node"); + unsigned OurDFS = ++DFSNumber; + SparseBitVector<> ToErase; + SparseBitVector<> NewEdges; + Node2DFS[Node] = OurDFS; + + for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin(); + bi != GraphNodes[Node].Edges->end(); + ++bi) { + unsigned RepNode = FindNode(*bi); + // If we are going to add an edge to repnode, we have no need for the edge + // to e anymore. + if (RepNode != *bi && NewEdges.test(RepNode)){ + ToErase.set(*bi); + continue; } - } - if (I != E) { - Pointees.insert(Pointees.end(), I, E); - Changed = true; + // Continue about our DFS. + if (!Node2Deleted[RepNode]){ + if (Node2DFS[RepNode] == 0) { + QueryNode(RepNode); + // May have been changed by query + RepNode = FindNode(RepNode); + } + if (Node2DFS[RepNode] < Node2DFS[Node]) + Node2DFS[Node] = Node2DFS[RepNode]; + } + // We may have just discovered that e belongs to a cycle, in which case we + // can also erase it. + if (RepNode != *bi) { + ToErase.set(*bi); + NewEdges.set(RepNode); + } } - return Changed; -} + GraphNodes[Node].Edges->intersectWithComplement(ToErase); + GraphNodes[Node].Edges |= NewEdges; -bool Andersens::Node::loadFrom(Node *N) { - bool Changed = false; - for (iterator I = N->begin(), E = N->end(); I != E; ++I) - Changed |= copyFrom(*I); - return Changed; -} + // If this node is a root of a non-trivial SCC, place it on our worklist to be + // processed + if (OurDFS == Node2DFS[Node]) { + bool Changed = false; + while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= OurDFS) { + Node = UniteNodes(Node, FindNode(SCCStack.top())); + + SCCStack.pop(); + Changed = true; + } + Node2Deleted[Node] = true; + RPONumber++; -bool Andersens::Node::storeThrough(Node *N) { - bool Changed = false; - for (iterator I = begin(), E = end(); I != E; ++I) - Changed |= (*I)->copyFrom(N); - return Changed; + Topo2Node.at(GraphNodes.size() - RPONumber) = Node; + Node2Topo[Node] = GraphNodes.size() - RPONumber; + if (Changed) + GraphNodes[Node].Changed = true; + } else { + SCCStack.push(Node); + } } @@ -1033,73 +1180,257 @@ bool Andersens::Node::storeThrough(Node *N) { void Andersens::SolveConstraints() { bool Changed = true; unsigned Iteration = 0; - while (Changed) { - Changed = false; - ++NumIters; - DOUT << "Starting iteration #" << Iteration++ << "!\n"; - // Loop over all of the constraints, applying them in turn. - for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - Constraint &C = Constraints[i]; - switch (C.Type) { - case Constraint::Copy: - Changed |= C.Dest->copyFrom(C.Src); - break; - case Constraint::Load: - Changed |= C.Dest->loadFrom(C.Src); - break; - case Constraint::Store: - Changed |= C.Dest->storeThrough(C.Src); - break; - default: - assert(0 && "Unknown constraint!"); - } + // We create the bitmaps here to avoid getting jerked around by the compiler + // creating objects behind our back and wasting lots of memory. + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + N->PointsTo = new SparseBitVector<>; + N->OldPointsTo = new SparseBitVector<>; + N->Edges = new SparseBitVector<>; + } + CreateConstraintGraph(); + + Topo2Node.insert(Topo2Node.begin(), GraphNodes.size(), Unvisited); + Node2Topo.insert(Node2Topo.begin(), GraphNodes.size(), Unvisited); + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + DFSNumber = 0; + RPONumber = 0; + // Order graph and mark starting nodes as changed. + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + unsigned N = FindNode(i); + Node *INode = &GraphNodes[i]; + if (Node2DFS[N] == 0) { + QueryNode(N); + // Mark as changed if it's a representation and can contribute to the + // calculation right now. + if (INode->NodeRep == SelfRep && !INode->PointsTo->empty() + && (!INode->Edges->empty() || !INode->Constraints.empty())) + INode->Changed = true; } + } - if (Changed) { - // Check to see if any internal function's addresses have been passed to - // external functions. If so, we have to assume that their incoming - // arguments could be anything. If there are any internal functions in - // the universal node that we don't know about, we must iterate. - for (Node::iterator I = GraphNodes[UniversalSet].begin(), - E = GraphNodes[UniversalSet].end(); I != E; ++I) - if (Function *F = dyn_cast_or_null<Function>((*I)->getValue())) - if (F->hasInternalLinkage() && - EscapingInternalFunctions.insert(F).second) { - // We found a function that is just now escaping. Mark it as if it - // didn't have internal linkage. - AddConstraintsForNonInternalLinkage(F); - DOUT << "Found escaping internal function: " << F->getName() <<"\n"; - ++NumEscapingFunctions; - } + do { + Changed = false; - // Check to see if we have discovered any new callees of the indirect call - // sites. If so, add constraints to the analysis. - for (unsigned i = 0, e = IndirectCalls.size(); i != e; ++i) { - CallSite CS = IndirectCalls[i]; - std::vector<Function*> &KnownCallees = IndirectCallees[CS]; - Node *CN = getNode(CS.getCalledValue()); - - for (Node::iterator NI = CN->begin(), E = CN->end(); NI != E; ++NI) - if (Function *F = dyn_cast_or_null<Function>((*NI)->getValue())) { - std::vector<Function*>::iterator IP = - std::lower_bound(KnownCallees.begin(), KnownCallees.end(), F); - if (IP == KnownCallees.end() || *IP != F) { - // Add the constraints for the call now. - AddConstraintsForCall(CS, F); - DOUT << "Found actual callee '" - << F->getName() << "' for call: " - << *CS.getInstruction() << "\n"; - ++NumIndirectCallees; - KnownCallees.insert(IP, F); + ++NumIters; + DOUT << "Starting iteration #" << Iteration++ << "!\n"; + // TODO: In the microoptimization category, we could just make Topo2Node + // a fast map and thus only contain the visited nodes. + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + unsigned CurrNodeIndex = Topo2Node[i]; + Node *CurrNode; + + // We may not revisit all nodes on every iteration + if (CurrNodeIndex == Unvisited) + continue; + CurrNode = &GraphNodes[CurrNodeIndex]; + // See if this is a node we need to process on this iteration + if (!CurrNode->Changed || CurrNode->NodeRep != SelfRep) + continue; + CurrNode->Changed = false; + + // Figure out the changed points to bits + SparseBitVector<> CurrPointsTo; + CurrPointsTo.intersectWithComplement(CurrNode->PointsTo, + CurrNode->OldPointsTo); + if (CurrPointsTo.empty()){ + continue; + } + *(CurrNode->OldPointsTo) |= CurrPointsTo; + + /* Now process the constraints for this node. */ + for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin(); + li != CurrNode->Constraints.end(); ) { + li->Src = FindNode(li->Src); + li->Dest = FindNode(li->Dest); + + // TODO: We could delete redundant constraints here. + // Src and Dest will be the vars we are going to process. + // This may look a bit ugly, but what it does is allow us to process + // both store and load constraints with the same function. + // Load constraints say that every member of our RHS solution has K + // added to it, and that variable gets an edge to LHS. We also union + // RHS+K's solution into the LHS solution. + // Store constraints say that every member of our LHS solution has K + // added to it, and that variable gets an edge from RHS. We also union + // RHS's solution into the LHS+K solution. + unsigned *Src; + unsigned *Dest; + unsigned K = li->Offset; + unsigned CurrMember; + if (li->Type == Constraint::Load) { + Src = &CurrMember; + Dest = &li->Dest; + } else if (li->Type == Constraint::Store) { + Src = &li->Src; + Dest = &CurrMember; + } else { + // TODO Handle offseted copy constraint + li++; + continue; + } + // TODO: hybrid cycle detection would go here, we should check + // if it was a statically detected offline equivalence that + // involves pointers , and if so, remove the redundant constraints. + + const SparseBitVector<> &Solution = CurrPointsTo; + + for (SparseBitVector<>::iterator bi = Solution.begin(); + bi != Solution.end(); + ++bi) { + CurrMember = *bi; + + // Need to increment the member by K since that is where we are + // supposed to copy to/from + // Node that in positive weight cycles, which occur in address taking + // of fields, K can go past + // MaxK[CurrMember] elements, even though that is all it could + // point to. + if (K > 0 && K > MaxK[CurrMember]) + continue; + else + CurrMember = FindNode(CurrMember + K); + + // Add an edge to the graph, so we can just do regular bitmap ior next + // time. It may also let us notice a cycle. + if (!GraphNodes[*Src].Edges->test_and_set(*Dest)) { + if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo)) { + GraphNodes[*Dest].Changed = true; + // If we changed a node we've already processed, we need another + // iteration. + if (Node2Topo[*Dest] <= i) + Changed = true; } } + } + li++; + } + SparseBitVector<> NewEdges; + SparseBitVector<> ToErase; + + // Now all we have left to do is propagate points-to info along the + // edges, erasing the redundant edges. + + + for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin(); + bi != CurrNode->Edges->end(); + ++bi) { + + unsigned DestVar = *bi; + unsigned Rep = FindNode(DestVar); + + // If we ended up with this node as our destination, or we've already + // got an edge for the representative, delete the current edge. + if (Rep == CurrNodeIndex || + (Rep != DestVar && NewEdges.test(Rep))) { + ToErase.set(DestVar); + continue; + } + // Union the points-to sets into the dest + if (GraphNodes[Rep].PointsTo |= CurrPointsTo) { + GraphNodes[Rep].Changed = true; + if (Node2Topo[Rep] <= i) + Changed = true; + } + // If this edge's destination was collapsed, rewrite the edge. + if (Rep != DestVar) { + ToErase.set(DestVar); + NewEdges.set(Rep); + } + } + CurrNode->Edges->intersectWithComplement(ToErase); + CurrNode->Edges |= NewEdges; + } + if (Changed) { + DFSNumber = RPONumber = 0; + Node2Deleted.clear(); + Topo2Node.clear(); + Node2Topo.clear(); + Node2DFS.clear(); + Topo2Node.insert(Topo2Node.begin(), GraphNodes.size(), Unvisited); + Node2Topo.insert(Node2Topo.begin(), GraphNodes.size(), Unvisited); + Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0); + Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false); + // Rediscover the DFS/Topo ordering, and cycle detect. + for (unsigned j = 0; j < GraphNodes.size(); j++) { + unsigned JRep = FindNode(j); + if (Node2DFS[JRep] == 0) + QueryNode(JRep); } } + + } while (Changed); + + Node2Topo.clear(); + Topo2Node.clear(); + Node2DFS.clear(); + Node2Deleted.clear(); + for (unsigned i = 0; i < GraphNodes.size(); ++i) { + Node *N = &GraphNodes[i]; + delete N->OldPointsTo; + delete N->Edges; } } +//===----------------------------------------------------------------------===// +// Union-Find +//===----------------------------------------------------------------------===// + +// Unite nodes First and Second, returning the one which is now the +// representative node. First and Second are indexes into GraphNodes +unsigned Andersens::UniteNodes(unsigned First, unsigned Second) { + assert (First < GraphNodes.size() && Second < GraphNodes.size() && + "Attempting to merge nodes that don't exist"); + // TODO: implement union by rank + Node *FirstNode = &GraphNodes[First]; + Node *SecondNode = &GraphNodes[Second]; + + assert (SecondNode->NodeRep == SelfRep && FirstNode->NodeRep == SelfRep && + "Trying to unite two non-representative nodes!"); + if (First == Second) + return First; + + SecondNode->NodeRep = First; + FirstNode->Changed |= SecondNode->Changed; + FirstNode->PointsTo |= *(SecondNode->PointsTo); + FirstNode->Edges |= *(SecondNode->Edges); + FirstNode->Constraints.splice(FirstNode->Constraints.begin(), + SecondNode->Constraints); + delete FirstNode->OldPointsTo; + FirstNode->OldPointsTo = new SparseBitVector<>; + + // Destroy interesting parts of the merged-from node. + delete SecondNode->OldPointsTo; + delete SecondNode->Edges; + delete SecondNode->PointsTo; + SecondNode->Edges = NULL; + SecondNode->PointsTo = NULL; + SecondNode->OldPointsTo = NULL; + + NumUnified++; + DOUT << "Unified Node "; + DEBUG(PrintNode(FirstNode)); + DOUT << " and Node "; + DEBUG(PrintNode(SecondNode)); + DOUT << "\n"; + + // TODO: Handle SDT + return First; +} +// Find the index into GraphNodes of the node representing Node, performing +// path compression along the way +unsigned Andersens::FindNode(unsigned NodeIndex) { + assert (NodeIndex < GraphNodes.size() + && "Attempting to find a node that can't exist"); + Node *N = &GraphNodes[NodeIndex]; + if (N->NodeRep == SelfRep) + return NodeIndex; + else + return (N->NodeRep = FindNode(N->NodeRep)); +} //===----------------------------------------------------------------------===// // Debugging Output @@ -1116,15 +1447,20 @@ void Andersens::PrintNode(Node *N) { cerr << "<null>"; return; } + if (!N->getValue()) { + cerr << "artificial" << (intptr_t) N; + return; + } assert(N->getValue() != 0 && "Never set node label!"); Value *V = N->getValue(); if (Function *F = dyn_cast<Function>(V)) { if (isa<PointerType>(F->getFunctionType()->getReturnType()) && - N == getReturnNode(F)) { + N == &GraphNodes[getReturnNode(F)]) { cerr << F->getName() << ":retval"; return; - } else if (F->getFunctionType()->isVarArg() && N == getVarargNode(F)) { + } else if (F->getFunctionType()->isVarArg() && + N == &GraphNodes[getVarargNode(F)]) { cerr << F->getName() << ":vararg"; return; } @@ -1141,22 +1477,36 @@ void Andersens::PrintNode(Node *N) { cerr << "(unnamed)"; if (isa<GlobalValue>(V) || isa<AllocationInst>(V)) - if (N == getObject(V)) + if (N == &GraphNodes[getObject(V)]) cerr << "<mem>"; } void Andersens::PrintConstraints() { cerr << "Constraints:\n"; + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { - cerr << " #" << i << ": "; - Constraint &C = Constraints[i]; - if (C.Type == Constraint::Store) + const Constraint &C = Constraints[i]; + if (C.Type == Constraint::Store) { cerr << "*"; - PrintNode(C.Dest); + if (C.Offset != 0) + cerr << "("; + } + PrintNode(&GraphNodes[C.Dest]); + if (C.Type == Constraint::Store && C.Offset != 0) + cerr << " + " << C.Offset << ")"; cerr << " = "; - if (C.Type == Constraint::Load) + if (C.Type == Constraint::Load) { cerr << "*"; - PrintNode(C.Src); + if (C.Offset != 0) + cerr << "("; + } + else if (C.Type == Constraint::AddressOf) + cerr << "&"; + PrintNode(&GraphNodes[C.Src]); + if (C.Offset != 0 && C.Type != Constraint::Store) + cerr << " + " << C.Offset; + if (C.Type == Constraint::Load && C.Offset != 0) + cerr << ")"; cerr << "\n"; } } @@ -1165,13 +1515,26 @@ void Andersens::PrintPointsToGraph() { cerr << "Points-to graph:\n"; for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { Node *N = &GraphNodes[i]; - cerr << "[" << (N->end() - N->begin()) << "] "; - PrintNode(N); - cerr << "\t--> "; - for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) { - if (I != N->begin()) cerr << ", "; - PrintNode(*I); + if (FindNode (i) != i) { + PrintNode(N); + cerr << "\t--> same as "; + PrintNode(&GraphNodes[FindNode(i)]); + cerr << "\n"; + } else { + cerr << "[" << (N->PointsTo->count()) << "] "; + PrintNode(N); + cerr << "\t--> "; + + bool first = true; + for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); + bi != N->PointsTo->end(); + ++bi) { + if (!first) + cerr << ", "; + PrintNode(&GraphNodes[*bi]); + first = false; + } + cerr << "\n"; } - cerr << "\n"; } } |